diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..df9efad
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,116 @@
+# Initially taken from Github's Python gitignore file
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..cf30ded
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,5 @@
+
+# Default ignored files
+/workspace.xml
+# Datasource local storage ignored files
+/dataSources.local.xml
\ No newline at end of file
diff --git a/.idea/bert.iml b/.idea/bert.iml
new file mode 100644
index 0000000..6a3f7ec
--- /dev/null
+++ b/.idea/bert.iml
@@ -0,0 +1,11 @@
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/dataSources.xml b/.idea/dataSources.xml
new file mode 100644
index 0000000..eec2c94
--- /dev/null
+++ b/.idea/dataSources.xml
@@ -0,0 +1,11 @@
+
+
+
+
+ sqlite.xerial
+ true
+ org.sqlite.JDBC
+ jdbc:sqlite:C:\Users\Administrator\Documents\GitHub\bert\bptdata.db
+
+
+
\ No newline at end of file
diff --git a/.idea/dictionaries/Administrator.xml b/.idea/dictionaries/Administrator.xml
new file mode 100644
index 0000000..b033f19
--- /dev/null
+++ b/.idea/dictionaries/Administrator.xml
@@ -0,0 +1,22 @@
+
+
+
+ amki
+ asctime
+ badrequest
+ bptdata
+ codedream
+ epaper
+ epout
+ eppdt
+ eppdtout
+ eppredict
+ idcode
+ levelname
+ nlpdata
+ sckstn
+ stnid
+ stns
+
+
+
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..fb94267
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..e84c31f
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/other.xml b/.idea/other.xml
new file mode 100644
index 0000000..640fd80
--- /dev/null
+++ b/.idea/other.xml
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/sqldialects.xml b/.idea/sqldialects.xml
new file mode 100644
index 0000000..5b66d9a
--- /dev/null
+++ b/.idea/sqldialects.xml
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/__init__.py b/__init__.py
new file mode 100644
index 0000000..effb57b
--- /dev/null
+++ b/__init__.py
@@ -0,0 +1,15 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
diff --git a/bptdata.db b/bptdata.db
new file mode 100644
index 0000000..7e4b91d
Binary files /dev/null and b/bptdata.db differ
diff --git a/chinese_wwm_ext_L-12_H-768_A-12/bert_config.json b/chinese_wwm_ext_L-12_H-768_A-12/bert_config.json
new file mode 100644
index 0000000..adb75ff
--- /dev/null
+++ b/chinese_wwm_ext_L-12_H-768_A-12/bert_config.json
@@ -0,0 +1,19 @@
+{
+ "attention_probs_dropout_prob": 0.1,
+ "directionality": "bidi",
+ "hidden_act": "gelu",
+ "hidden_dropout_prob": 0.1,
+ "hidden_size": 768,
+ "initializer_range": 0.02,
+ "intermediate_size": 3072,
+ "max_position_embeddings": 512,
+ "num_attention_heads": 12,
+ "num_hidden_layers": 12,
+ "pooler_fc_size": 768,
+ "pooler_num_attention_heads": 12,
+ "pooler_num_fc_layers": 3,
+ "pooler_size_per_head": 128,
+ "pooler_type": "first_token_transform",
+ "type_vocab_size": 2,
+ "vocab_size": 21128
+}
diff --git a/chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt.index b/chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt.index
new file mode 100644
index 0000000..8c80a01
Binary files /dev/null and b/chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt.index differ
diff --git a/chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt.meta b/chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt.meta
new file mode 100644
index 0000000..7b374b5
Binary files /dev/null and b/chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt.meta differ
diff --git a/chinese_wwm_ext_L-12_H-768_A-12/vocab.txt b/chinese_wwm_ext_L-12_H-768_A-12/vocab.txt
new file mode 100644
index 0000000..ca4f978
--- /dev/null
+++ b/chinese_wwm_ext_L-12_H-768_A-12/vocab.txt
@@ -0,0 +1,21128 @@
+[PAD]
+[unused1]
+[unused2]
+[unused3]
+[unused4]
+[unused5]
+[unused6]
+[unused7]
+[unused8]
+[unused9]
+[unused10]
+[unused11]
+[unused12]
+[unused13]
+[unused14]
+[unused15]
+[unused16]
+[unused17]
+[unused18]
+[unused19]
+[unused20]
+[unused21]
+[unused22]
+[unused23]
+[unused24]
+[unused25]
+[unused26]
+[unused27]
+[unused28]
+[unused29]
+[unused30]
+[unused31]
+[unused32]
+[unused33]
+[unused34]
+[unused35]
+[unused36]
+[unused37]
+[unused38]
+[unused39]
+[unused40]
+[unused41]
+[unused42]
+[unused43]
+[unused44]
+[unused45]
+[unused46]
+[unused47]
+[unused48]
+[unused49]
+[unused50]
+[unused51]
+[unused52]
+[unused53]
+[unused54]
+[unused55]
+[unused56]
+[unused57]
+[unused58]
+[unused59]
+[unused60]
+[unused61]
+[unused62]
+[unused63]
+[unused64]
+[unused65]
+[unused66]
+[unused67]
+[unused68]
+[unused69]
+[unused70]
+[unused71]
+[unused72]
+[unused73]
+[unused74]
+[unused75]
+[unused76]
+[unused77]
+[unused78]
+[unused79]
+[unused80]
+[unused81]
+[unused82]
+[unused83]
+[unused84]
+[unused85]
+[unused86]
+[unused87]
+[unused88]
+[unused89]
+[unused90]
+[unused91]
+[unused92]
+[unused93]
+[unused94]
+[unused95]
+[unused96]
+[unused97]
+[unused98]
+[unused99]
+[UNK]
+[CLS]
+[SEP]
+[MASK]
+
+
+!
+"
+#
+$
+%
+&
+'
+(
+)
+*
++
+,
+-
+.
+/
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+:
+;
+<
+=
+>
+?
+@
+[
+\
+]
+^
+_
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z
+{
+|
+}
+~
+£
+¤
+¥
+§
+©
+«
+®
+°
+±
+²
+³
+µ
+·
+¹
+º
+»
+¼
+×
+ß
+æ
+÷
+ø
+đ
+ŋ
+ɔ
+ə
+ɡ
+ʰ
+ˇ
+ˈ
+ˊ
+ˋ
+ˍ
+ː
+˙
+˚
+ˢ
+α
+β
+γ
+δ
+ε
+η
+θ
+ι
+κ
+λ
+μ
+ν
+ο
+π
+ρ
+ς
+σ
+τ
+υ
+φ
+χ
+ψ
+ω
+а
+б
+в
+г
+д
+е
+ж
+з
+и
+к
+л
+м
+н
+о
+п
+р
+с
+т
+у
+ф
+х
+ц
+ч
+ш
+ы
+ь
+я
+і
+ا
+ب
+ة
+ت
+د
+ر
+س
+ع
+ل
+م
+ن
+ه
+و
+ي
+۩
+ก
+ง
+น
+ม
+ย
+ร
+อ
+า
+เ
+๑
+་
+ღ
+ᄀ
+ᄁ
+ᄂ
+ᄃ
+ᄅ
+ᄆ
+ᄇ
+ᄈ
+ᄉ
+ᄋ
+ᄌ
+ᄎ
+ᄏ
+ᄐ
+ᄑ
+ᄒ
+ᅡ
+ᅢ
+ᅣ
+ᅥ
+ᅦ
+ᅧ
+ᅨ
+ᅩ
+ᅪ
+ᅬ
+ᅭ
+ᅮ
+ᅯ
+ᅲ
+ᅳ
+ᅴ
+ᅵ
+ᆨ
+ᆫ
+ᆯ
+ᆷ
+ᆸ
+ᆺ
+ᆻ
+ᆼ
+ᗜ
+ᵃ
+ᵉ
+ᵍ
+ᵏ
+ᵐ
+ᵒ
+ᵘ
+‖
+„
+†
+•
+‥
+‧
+
+‰
+′
+″
+‹
+›
+※
+‿
+⁄
+ⁱ
+⁺
+ⁿ
+₁
+₂
+₃
+₄
+€
+℃
+№
+™
+ⅰ
+ⅱ
+ⅲ
+ⅳ
+ⅴ
+←
+↑
+→
+↓
+↔
+↗
+↘
+⇒
+∀
+−
+∕
+∙
+√
+∞
+∟
+∠
+∣
+∥
+∩
+∮
+∶
+∼
+∽
+≈
+≒
+≡
+≤
+≥
+≦
+≧
+≪
+≫
+⊙
+⋅
+⋈
+⋯
+⌒
+①
+②
+③
+④
+⑤
+⑥
+⑦
+⑧
+⑨
+⑩
+⑴
+⑵
+⑶
+⑷
+⑸
+⒈
+⒉
+⒊
+⒋
+ⓒ
+ⓔ
+ⓘ
+─
+━
+│
+┃
+┅
+┆
+┊
+┌
+└
+├
+┣
+═
+║
+╚
+╞
+╠
+╭
+╮
+╯
+╰
+╱
+╳
+▂
+▃
+▅
+▇
+█
+▉
+▋
+▌
+▍
+▎
+■
+□
+▪
+▫
+▬
+▲
+△
+▶
+►
+▼
+▽
+◆
+◇
+○
+◎
+●
+◕
+◠
+◢
+◤
+☀
+★
+☆
+☕
+☞
+☺
+☼
+♀
+♂
+♠
+♡
+♣
+♥
+♦
+♪
+♫
+♬
+✈
+✔
+✕
+✖
+✦
+✨
+✪
+✰
+✿
+❀
+❤
+➜
+➤
+⦿
+、
+。
+〃
+々
+〇
+〈
+〉
+《
+》
+「
+」
+『
+』
+【
+】
+〓
+〔
+〕
+〖
+〗
+〜
+〝
+〞
+ぁ
+あ
+ぃ
+い
+う
+ぇ
+え
+お
+か
+き
+く
+け
+こ
+さ
+し
+す
+せ
+そ
+た
+ち
+っ
+つ
+て
+と
+な
+に
+ぬ
+ね
+の
+は
+ひ
+ふ
+へ
+ほ
+ま
+み
+む
+め
+も
+ゃ
+や
+ゅ
+ゆ
+ょ
+よ
+ら
+り
+る
+れ
+ろ
+わ
+を
+ん
+゜
+ゝ
+ァ
+ア
+ィ
+イ
+ゥ
+ウ
+ェ
+エ
+ォ
+オ
+カ
+キ
+ク
+ケ
+コ
+サ
+シ
+ス
+セ
+ソ
+タ
+チ
+ッ
+ツ
+テ
+ト
+ナ
+ニ
+ヌ
+ネ
+ノ
+ハ
+ヒ
+フ
+ヘ
+ホ
+マ
+ミ
+ム
+メ
+モ
+ャ
+ヤ
+ュ
+ユ
+ョ
+ヨ
+ラ
+リ
+ル
+レ
+ロ
+ワ
+ヲ
+ン
+ヶ
+・
+ー
+ヽ
+ㄅ
+ㄆ
+ㄇ
+ㄉ
+ㄋ
+ㄌ
+ㄍ
+ㄎ
+ㄏ
+ㄒ
+ㄚ
+ㄛ
+ㄞ
+ㄟ
+ㄢ
+ㄤ
+ㄥ
+ㄧ
+ㄨ
+ㆍ
+㈦
+㊣
+㎡
+㗎
+一
+丁
+七
+万
+丈
+三
+上
+下
+不
+与
+丐
+丑
+专
+且
+丕
+世
+丘
+丙
+业
+丛
+东
+丝
+丞
+丟
+両
+丢
+两
+严
+並
+丧
+丨
+个
+丫
+中
+丰
+串
+临
+丶
+丸
+丹
+为
+主
+丼
+丽
+举
+丿
+乂
+乃
+久
+么
+义
+之
+乌
+乍
+乎
+乏
+乐
+乒
+乓
+乔
+乖
+乗
+乘
+乙
+乜
+九
+乞
+也
+习
+乡
+书
+乩
+买
+乱
+乳
+乾
+亀
+亂
+了
+予
+争
+事
+二
+于
+亏
+云
+互
+五
+井
+亘
+亙
+亚
+些
+亜
+亞
+亟
+亡
+亢
+交
+亥
+亦
+产
+亨
+亩
+享
+京
+亭
+亮
+亲
+亳
+亵
+人
+亿
+什
+仁
+仃
+仄
+仅
+仆
+仇
+今
+介
+仍
+从
+仏
+仑
+仓
+仔
+仕
+他
+仗
+付
+仙
+仝
+仞
+仟
+代
+令
+以
+仨
+仪
+们
+仮
+仰
+仲
+件
+价
+任
+份
+仿
+企
+伉
+伊
+伍
+伎
+伏
+伐
+休
+伕
+众
+优
+伙
+会
+伝
+伞
+伟
+传
+伢
+伤
+伦
+伪
+伫
+伯
+估
+伴
+伶
+伸
+伺
+似
+伽
+佃
+但
+佇
+佈
+位
+低
+住
+佐
+佑
+体
+佔
+何
+佗
+佘
+余
+佚
+佛
+作
+佝
+佞
+佟
+你
+佢
+佣
+佤
+佥
+佩
+佬
+佯
+佰
+佳
+併
+佶
+佻
+佼
+使
+侃
+侄
+來
+侈
+例
+侍
+侏
+侑
+侖
+侗
+供
+依
+侠
+価
+侣
+侥
+侦
+侧
+侨
+侬
+侮
+侯
+侵
+侶
+侷
+便
+係
+促
+俄
+俊
+俎
+俏
+俐
+俑
+俗
+俘
+俚
+保
+俞
+俟
+俠
+信
+俨
+俩
+俪
+俬
+俭
+修
+俯
+俱
+俳
+俸
+俺
+俾
+倆
+倉
+個
+倌
+倍
+倏
+們
+倒
+倔
+倖
+倘
+候
+倚
+倜
+借
+倡
+値
+倦
+倩
+倪
+倫
+倬
+倭
+倶
+债
+值
+倾
+偃
+假
+偈
+偉
+偌
+偎
+偏
+偕
+做
+停
+健
+側
+偵
+偶
+偷
+偻
+偽
+偿
+傀
+傅
+傍
+傑
+傘
+備
+傚
+傢
+傣
+傥
+储
+傩
+催
+傭
+傲
+傳
+債
+傷
+傻
+傾
+僅
+働
+像
+僑
+僕
+僖
+僚
+僥
+僧
+僭
+僮
+僱
+僵
+價
+僻
+儀
+儂
+億
+儆
+儉
+儋
+儒
+儕
+儘
+償
+儡
+優
+儲
+儷
+儼
+儿
+兀
+允
+元
+兄
+充
+兆
+兇
+先
+光
+克
+兌
+免
+児
+兑
+兒
+兔
+兖
+党
+兜
+兢
+入
+內
+全
+兩
+八
+公
+六
+兮
+兰
+共
+兲
+关
+兴
+兵
+其
+具
+典
+兹
+养
+兼
+兽
+冀
+内
+円
+冇
+冈
+冉
+冊
+册
+再
+冏
+冒
+冕
+冗
+写
+军
+农
+冠
+冢
+冤
+冥
+冨
+冪
+冬
+冯
+冰
+冲
+决
+况
+冶
+冷
+冻
+冼
+冽
+冾
+净
+凄
+准
+凇
+凈
+凉
+凋
+凌
+凍
+减
+凑
+凛
+凜
+凝
+几
+凡
+凤
+処
+凪
+凭
+凯
+凰
+凱
+凳
+凶
+凸
+凹
+出
+击
+函
+凿
+刀
+刁
+刃
+分
+切
+刈
+刊
+刍
+刎
+刑
+划
+列
+刘
+则
+刚
+创
+初
+删
+判
+別
+刨
+利
+刪
+别
+刮
+到
+制
+刷
+券
+刹
+刺
+刻
+刽
+剁
+剂
+剃
+則
+剉
+削
+剋
+剌
+前
+剎
+剐
+剑
+剔
+剖
+剛
+剜
+剝
+剣
+剤
+剥
+剧
+剩
+剪
+副
+割
+創
+剷
+剽
+剿
+劃
+劇
+劈
+劉
+劊
+劍
+劏
+劑
+力
+劝
+办
+功
+加
+务
+劣
+动
+助
+努
+劫
+劭
+励
+劲
+劳
+労
+劵
+効
+劾
+势
+勁
+勃
+勇
+勉
+勋
+勐
+勒
+動
+勖
+勘
+務
+勛
+勝
+勞
+募
+勢
+勤
+勧
+勳
+勵
+勸
+勺
+勻
+勾
+勿
+匀
+包
+匆
+匈
+匍
+匐
+匕
+化
+北
+匙
+匝
+匠
+匡
+匣
+匪
+匮
+匯
+匱
+匹
+区
+医
+匾
+匿
+區
+十
+千
+卅
+升
+午
+卉
+半
+卍
+华
+协
+卑
+卒
+卓
+協
+单
+卖
+南
+単
+博
+卜
+卞
+卟
+占
+卡
+卢
+卤
+卦
+卧
+卫
+卮
+卯
+印
+危
+即
+却
+卵
+卷
+卸
+卻
+卿
+厂
+厄
+厅
+历
+厉
+压
+厌
+厕
+厘
+厚
+厝
+原
+厢
+厥
+厦
+厨
+厩
+厭
+厮
+厲
+厳
+去
+县
+叁
+参
+參
+又
+叉
+及
+友
+双
+反
+収
+发
+叔
+取
+受
+变
+叙
+叛
+叟
+叠
+叡
+叢
+口
+古
+句
+另
+叨
+叩
+只
+叫
+召
+叭
+叮
+可
+台
+叱
+史
+右
+叵
+叶
+号
+司
+叹
+叻
+叼
+叽
+吁
+吃
+各
+吆
+合
+吉
+吊
+吋
+同
+名
+后
+吏
+吐
+向
+吒
+吓
+吕
+吖
+吗
+君
+吝
+吞
+吟
+吠
+吡
+否
+吧
+吨
+吩
+含
+听
+吭
+吮
+启
+吱
+吳
+吴
+吵
+吶
+吸
+吹
+吻
+吼
+吽
+吾
+呀
+呂
+呃
+呆
+呈
+告
+呋
+呎
+呐
+呓
+呕
+呗
+员
+呛
+呜
+呢
+呤
+呦
+周
+呱
+呲
+味
+呵
+呷
+呸
+呻
+呼
+命
+咀
+咁
+咂
+咄
+咆
+咋
+和
+咎
+咏
+咐
+咒
+咔
+咕
+咖
+咗
+咘
+咙
+咚
+咛
+咣
+咤
+咦
+咧
+咨
+咩
+咪
+咫
+咬
+咭
+咯
+咱
+咲
+咳
+咸
+咻
+咽
+咿
+哀
+品
+哂
+哄
+哆
+哇
+哈
+哉
+哋
+哌
+响
+哎
+哏
+哐
+哑
+哒
+哔
+哗
+哟
+員
+哥
+哦
+哧
+哨
+哩
+哪
+哭
+哮
+哲
+哺
+哼
+哽
+唁
+唄
+唆
+唇
+唉
+唏
+唐
+唑
+唔
+唠
+唤
+唧
+唬
+售
+唯
+唰
+唱
+唳
+唷
+唸
+唾
+啃
+啄
+商
+啉
+啊
+問
+啓
+啕
+啖
+啜
+啞
+啟
+啡
+啤
+啥
+啦
+啧
+啪
+啫
+啬
+啮
+啰
+啱
+啲
+啵
+啶
+啷
+啸
+啻
+啼
+啾
+喀
+喂
+喃
+善
+喆
+喇
+喉
+喊
+喋
+喎
+喏
+喔
+喘
+喙
+喚
+喜
+喝
+喟
+喧
+喪
+喫
+喬
+單
+喰
+喱
+喲
+喳
+喵
+営
+喷
+喹
+喺
+喻
+喽
+嗅
+嗆
+嗇
+嗎
+嗑
+嗒
+嗓
+嗔
+嗖
+嗚
+嗜
+嗝
+嗟
+嗡
+嗣
+嗤
+嗦
+嗨
+嗪
+嗬
+嗯
+嗰
+嗲
+嗳
+嗶
+嗷
+嗽
+嘀
+嘅
+嘆
+嘈
+嘉
+嘌
+嘍
+嘎
+嘔
+嘖
+嘗
+嘘
+嘚
+嘛
+嘜
+嘞
+嘟
+嘢
+嘣
+嘤
+嘧
+嘩
+嘭
+嘮
+嘯
+嘰
+嘱
+嘲
+嘴
+嘶
+嘸
+嘹
+嘻
+嘿
+噁
+噌
+噎
+噓
+噔
+噗
+噙
+噜
+噠
+噢
+噤
+器
+噩
+噪
+噬
+噱
+噴
+噶
+噸
+噹
+噻
+噼
+嚀
+嚇
+嚎
+嚏
+嚐
+嚓
+嚕
+嚟
+嚣
+嚥
+嚨
+嚮
+嚴
+嚷
+嚼
+囂
+囉
+囊
+囍
+囑
+囔
+囗
+囚
+四
+囝
+回
+囟
+因
+囡
+团
+団
+囤
+囧
+囪
+囫
+园
+困
+囱
+囲
+図
+围
+囹
+固
+国
+图
+囿
+圃
+圄
+圆
+圈
+國
+圍
+圏
+園
+圓
+圖
+團
+圜
+土
+圣
+圧
+在
+圩
+圭
+地
+圳
+场
+圻
+圾
+址
+坂
+均
+坊
+坍
+坎
+坏
+坐
+坑
+块
+坚
+坛
+坝
+坞
+坟
+坠
+坡
+坤
+坦
+坨
+坪
+坯
+坳
+坵
+坷
+垂
+垃
+垄
+型
+垒
+垚
+垛
+垠
+垢
+垣
+垦
+垩
+垫
+垭
+垮
+垵
+埂
+埃
+埋
+城
+埔
+埕
+埗
+域
+埠
+埤
+埵
+執
+埸
+培
+基
+埼
+堀
+堂
+堃
+堅
+堆
+堇
+堑
+堕
+堙
+堡
+堤
+堪
+堯
+堰
+報
+場
+堵
+堺
+堿
+塊
+塌
+塑
+塔
+塗
+塘
+塚
+塞
+塢
+塩
+填
+塬
+塭
+塵
+塾
+墀
+境
+墅
+墉
+墊
+墒
+墓
+増
+墘
+墙
+墜
+增
+墟
+墨
+墩
+墮
+墳
+墻
+墾
+壁
+壅
+壆
+壇
+壊
+壑
+壓
+壕
+壘
+壞
+壟
+壢
+壤
+壩
+士
+壬
+壮
+壯
+声
+売
+壳
+壶
+壹
+壺
+壽
+处
+备
+変
+复
+夏
+夔
+夕
+外
+夙
+多
+夜
+够
+夠
+夢
+夥
+大
+天
+太
+夫
+夭
+央
+夯
+失
+头
+夷
+夸
+夹
+夺
+夾
+奂
+奄
+奇
+奈
+奉
+奋
+奎
+奏
+奐
+契
+奔
+奕
+奖
+套
+奘
+奚
+奠
+奢
+奥
+奧
+奪
+奬
+奮
+女
+奴
+奶
+奸
+她
+好
+如
+妃
+妄
+妆
+妇
+妈
+妊
+妍
+妒
+妓
+妖
+妘
+妙
+妝
+妞
+妣
+妤
+妥
+妨
+妩
+妪
+妮
+妲
+妳
+妹
+妻
+妾
+姆
+姉
+姊
+始
+姍
+姐
+姑
+姒
+姓
+委
+姗
+姚
+姜
+姝
+姣
+姥
+姦
+姨
+姪
+姫
+姬
+姹
+姻
+姿
+威
+娃
+娄
+娅
+娆
+娇
+娉
+娑
+娓
+娘
+娛
+娜
+娟
+娠
+娣
+娥
+娩
+娱
+娲
+娴
+娶
+娼
+婀
+婁
+婆
+婉
+婊
+婕
+婚
+婢
+婦
+婧
+婪
+婭
+婴
+婵
+婶
+婷
+婺
+婿
+媒
+媚
+媛
+媞
+媧
+媲
+媳
+媽
+媾
+嫁
+嫂
+嫉
+嫌
+嫑
+嫔
+嫖
+嫘
+嫚
+嫡
+嫣
+嫦
+嫩
+嫲
+嫵
+嫻
+嬅
+嬉
+嬌
+嬗
+嬛
+嬢
+嬤
+嬪
+嬰
+嬴
+嬷
+嬸
+嬿
+孀
+孃
+子
+孑
+孔
+孕
+孖
+字
+存
+孙
+孚
+孛
+孜
+孝
+孟
+孢
+季
+孤
+学
+孩
+孪
+孫
+孬
+孰
+孱
+孳
+孵
+學
+孺
+孽
+孿
+宁
+它
+宅
+宇
+守
+安
+宋
+完
+宏
+宓
+宕
+宗
+官
+宙
+定
+宛
+宜
+宝
+实
+実
+宠
+审
+客
+宣
+室
+宥
+宦
+宪
+宫
+宮
+宰
+害
+宴
+宵
+家
+宸
+容
+宽
+宾
+宿
+寂
+寄
+寅
+密
+寇
+富
+寐
+寒
+寓
+寛
+寝
+寞
+察
+寡
+寢
+寥
+實
+寧
+寨
+審
+寫
+寬
+寮
+寰
+寵
+寶
+寸
+对
+寺
+寻
+导
+対
+寿
+封
+専
+射
+将
+將
+專
+尉
+尊
+尋
+對
+導
+小
+少
+尔
+尕
+尖
+尘
+尚
+尝
+尤
+尧
+尬
+就
+尴
+尷
+尸
+尹
+尺
+尻
+尼
+尽
+尾
+尿
+局
+屁
+层
+屄
+居
+屆
+屈
+屉
+届
+屋
+屌
+屍
+屎
+屏
+屐
+屑
+展
+屜
+属
+屠
+屡
+屢
+層
+履
+屬
+屯
+山
+屹
+屿
+岀
+岁
+岂
+岌
+岐
+岑
+岔
+岖
+岗
+岘
+岙
+岚
+岛
+岡
+岩
+岫
+岬
+岭
+岱
+岳
+岷
+岸
+峇
+峋
+峒
+峙
+峡
+峤
+峥
+峦
+峨
+峪
+峭
+峯
+峰
+峴
+島
+峻
+峽
+崁
+崂
+崆
+崇
+崎
+崑
+崔
+崖
+崗
+崙
+崛
+崧
+崩
+崭
+崴
+崽
+嵇
+嵊
+嵋
+嵌
+嵐
+嵘
+嵩
+嵬
+嵯
+嶂
+嶄
+嶇
+嶋
+嶙
+嶺
+嶼
+嶽
+巅
+巍
+巒
+巔
+巖
+川
+州
+巡
+巢
+工
+左
+巧
+巨
+巩
+巫
+差
+己
+已
+巳
+巴
+巷
+巻
+巽
+巾
+巿
+币
+市
+布
+帅
+帆
+师
+希
+帐
+帑
+帕
+帖
+帘
+帚
+帛
+帜
+帝
+帥
+带
+帧
+師
+席
+帮
+帯
+帰
+帳
+帶
+帷
+常
+帼
+帽
+幀
+幂
+幄
+幅
+幌
+幔
+幕
+幟
+幡
+幢
+幣
+幫
+干
+平
+年
+并
+幸
+幹
+幺
+幻
+幼
+幽
+幾
+广
+庁
+広
+庄
+庆
+庇
+床
+序
+庐
+库
+应
+底
+庖
+店
+庙
+庚
+府
+庞
+废
+庠
+度
+座
+庫
+庭
+庵
+庶
+康
+庸
+庹
+庾
+廁
+廂
+廃
+廈
+廉
+廊
+廓
+廖
+廚
+廝
+廟
+廠
+廢
+廣
+廬
+廳
+延
+廷
+建
+廿
+开
+弁
+异
+弃
+弄
+弈
+弊
+弋
+式
+弑
+弒
+弓
+弔
+引
+弗
+弘
+弛
+弟
+张
+弥
+弦
+弧
+弩
+弭
+弯
+弱
+張
+強
+弹
+强
+弼
+弾
+彅
+彆
+彈
+彌
+彎
+归
+当
+录
+彗
+彙
+彝
+形
+彤
+彥
+彦
+彧
+彩
+彪
+彫
+彬
+彭
+彰
+影
+彷
+役
+彻
+彼
+彿
+往
+征
+径
+待
+徇
+很
+徉
+徊
+律
+後
+徐
+徑
+徒
+従
+徕
+得
+徘
+徙
+徜
+從
+徠
+御
+徨
+復
+循
+徬
+微
+徳
+徴
+徵
+德
+徹
+徼
+徽
+心
+必
+忆
+忌
+忍
+忏
+忐
+忑
+忒
+忖
+志
+忘
+忙
+応
+忠
+忡
+忤
+忧
+忪
+快
+忱
+念
+忻
+忽
+忿
+怀
+态
+怂
+怅
+怆
+怎
+怏
+怒
+怔
+怕
+怖
+怙
+怜
+思
+怠
+怡
+急
+怦
+性
+怨
+怪
+怯
+怵
+总
+怼
+恁
+恃
+恆
+恋
+恍
+恐
+恒
+恕
+恙
+恚
+恢
+恣
+恤
+恥
+恨
+恩
+恪
+恫
+恬
+恭
+息
+恰
+恳
+恵
+恶
+恸
+恺
+恻
+恼
+恿
+悄
+悅
+悉
+悌
+悍
+悔
+悖
+悚
+悟
+悠
+患
+悦
+您
+悩
+悪
+悬
+悯
+悱
+悲
+悴
+悵
+悶
+悸
+悻
+悼
+悽
+情
+惆
+惇
+惊
+惋
+惑
+惕
+惘
+惚
+惜
+惟
+惠
+惡
+惦
+惧
+惨
+惩
+惫
+惬
+惭
+惮
+惯
+惰
+惱
+想
+惴
+惶
+惹
+惺
+愁
+愆
+愈
+愉
+愍
+意
+愕
+愚
+愛
+愜
+感
+愣
+愤
+愧
+愫
+愷
+愿
+慄
+慈
+態
+慌
+慎
+慑
+慕
+慘
+慚
+慟
+慢
+慣
+慧
+慨
+慫
+慮
+慰
+慳
+慵
+慶
+慷
+慾
+憂
+憊
+憋
+憎
+憐
+憑
+憔
+憚
+憤
+憧
+憨
+憩
+憫
+憬
+憲
+憶
+憾
+懂
+懇
+懈
+應
+懊
+懋
+懑
+懒
+懦
+懲
+懵
+懶
+懷
+懸
+懺
+懼
+懾
+懿
+戀
+戈
+戊
+戌
+戍
+戎
+戏
+成
+我
+戒
+戕
+或
+战
+戚
+戛
+戟
+戡
+戦
+截
+戬
+戮
+戰
+戲
+戳
+戴
+戶
+户
+戸
+戻
+戾
+房
+所
+扁
+扇
+扈
+扉
+手
+才
+扎
+扑
+扒
+打
+扔
+払
+托
+扛
+扣
+扦
+执
+扩
+扪
+扫
+扬
+扭
+扮
+扯
+扰
+扱
+扳
+扶
+批
+扼
+找
+承
+技
+抄
+抉
+把
+抑
+抒
+抓
+投
+抖
+抗
+折
+抚
+抛
+抜
+択
+抟
+抠
+抡
+抢
+护
+报
+抨
+披
+抬
+抱
+抵
+抹
+押
+抽
+抿
+拂
+拄
+担
+拆
+拇
+拈
+拉
+拋
+拌
+拍
+拎
+拐
+拒
+拓
+拔
+拖
+拗
+拘
+拙
+拚
+招
+拜
+拟
+拡
+拢
+拣
+拥
+拦
+拧
+拨
+择
+括
+拭
+拮
+拯
+拱
+拳
+拴
+拷
+拼
+拽
+拾
+拿
+持
+挂
+指
+挈
+按
+挎
+挑
+挖
+挙
+挚
+挛
+挝
+挞
+挟
+挠
+挡
+挣
+挤
+挥
+挨
+挪
+挫
+振
+挲
+挹
+挺
+挽
+挾
+捂
+捅
+捆
+捉
+捋
+捌
+捍
+捎
+捏
+捐
+捕
+捞
+损
+捡
+换
+捣
+捧
+捨
+捩
+据
+捱
+捲
+捶
+捷
+捺
+捻
+掀
+掂
+掃
+掇
+授
+掉
+掌
+掏
+掐
+排
+掖
+掘
+掙
+掛
+掠
+採
+探
+掣
+接
+控
+推
+掩
+措
+掬
+掰
+掲
+掳
+掴
+掷
+掸
+掺
+揀
+揃
+揄
+揆
+揉
+揍
+描
+提
+插
+揖
+揚
+換
+握
+揣
+揩
+揪
+揭
+揮
+援
+揶
+揸
+揹
+揽
+搀
+搁
+搂
+搅
+損
+搏
+搐
+搓
+搔
+搖
+搗
+搜
+搞
+搡
+搪
+搬
+搭
+搵
+搶
+携
+搽
+摀
+摁
+摄
+摆
+摇
+摈
+摊
+摒
+摔
+摘
+摞
+摟
+摧
+摩
+摯
+摳
+摸
+摹
+摺
+摻
+撂
+撃
+撅
+撇
+撈
+撐
+撑
+撒
+撓
+撕
+撚
+撞
+撤
+撥
+撩
+撫
+撬
+播
+撮
+撰
+撲
+撵
+撷
+撸
+撻
+撼
+撿
+擀
+擁
+擂
+擄
+擅
+擇
+擊
+擋
+操
+擎
+擒
+擔
+擘
+據
+擞
+擠
+擡
+擢
+擦
+擬
+擰
+擱
+擲
+擴
+擷
+擺
+擼
+擾
+攀
+攏
+攒
+攔
+攘
+攙
+攜
+攝
+攞
+攢
+攣
+攤
+攥
+攪
+攫
+攬
+支
+收
+攸
+改
+攻
+放
+政
+故
+效
+敌
+敍
+敎
+敏
+救
+敕
+敖
+敗
+敘
+教
+敛
+敝
+敞
+敢
+散
+敦
+敬
+数
+敲
+整
+敵
+敷
+數
+斂
+斃
+文
+斋
+斌
+斎
+斐
+斑
+斓
+斗
+料
+斛
+斜
+斟
+斡
+斤
+斥
+斧
+斩
+斫
+斬
+断
+斯
+新
+斷
+方
+於
+施
+旁
+旃
+旅
+旋
+旌
+旎
+族
+旖
+旗
+无
+既
+日
+旦
+旧
+旨
+早
+旬
+旭
+旮
+旱
+时
+旷
+旺
+旻
+昀
+昂
+昆
+昇
+昉
+昊
+昌
+明
+昏
+易
+昔
+昕
+昙
+星
+映
+春
+昧
+昨
+昭
+是
+昱
+昴
+昵
+昶
+昼
+显
+晁
+時
+晃
+晉
+晋
+晌
+晏
+晒
+晓
+晔
+晕
+晖
+晗
+晚
+晝
+晞
+晟
+晤
+晦
+晨
+晩
+普
+景
+晰
+晴
+晶
+晷
+智
+晾
+暂
+暄
+暇
+暈
+暉
+暌
+暐
+暑
+暖
+暗
+暝
+暢
+暧
+暨
+暫
+暮
+暱
+暴
+暸
+暹
+曄
+曆
+曇
+曉
+曖
+曙
+曜
+曝
+曠
+曦
+曬
+曰
+曲
+曳
+更
+書
+曹
+曼
+曾
+替
+最
+會
+月
+有
+朋
+服
+朐
+朔
+朕
+朗
+望
+朝
+期
+朦
+朧
+木
+未
+末
+本
+札
+朮
+术
+朱
+朴
+朵
+机
+朽
+杀
+杂
+权
+杆
+杈
+杉
+李
+杏
+材
+村
+杓
+杖
+杜
+杞
+束
+杠
+条
+来
+杨
+杭
+杯
+杰
+東
+杳
+杵
+杷
+杼
+松
+板
+极
+构
+枇
+枉
+枋
+析
+枕
+林
+枚
+果
+枝
+枢
+枣
+枪
+枫
+枭
+枯
+枰
+枱
+枳
+架
+枷
+枸
+柄
+柏
+某
+柑
+柒
+染
+柔
+柘
+柚
+柜
+柞
+柠
+柢
+查
+柩
+柬
+柯
+柱
+柳
+柴
+柵
+査
+柿
+栀
+栃
+栄
+栅
+标
+栈
+栉
+栋
+栎
+栏
+树
+栓
+栖
+栗
+校
+栩
+株
+样
+核
+根
+格
+栽
+栾
+桀
+桁
+桂
+桃
+桅
+框
+案
+桉
+桌
+桎
+桐
+桑
+桓
+桔
+桜
+桠
+桡
+桢
+档
+桥
+桦
+桧
+桨
+桩
+桶
+桿
+梁
+梅
+梆
+梏
+梓
+梗
+條
+梟
+梢
+梦
+梧
+梨
+梭
+梯
+械
+梳
+梵
+梶
+检
+棂
+棄
+棉
+棋
+棍
+棒
+棕
+棗
+棘
+棚
+棟
+棠
+棣
+棧
+森
+棱
+棲
+棵
+棹
+棺
+椁
+椅
+椋
+植
+椎
+椒
+検
+椪
+椭
+椰
+椹
+椽
+椿
+楂
+楊
+楓
+楔
+楚
+楝
+楞
+楠
+楣
+楨
+楫
+業
+楮
+極
+楷
+楸
+楹
+楼
+楽
+概
+榄
+榆
+榈
+榉
+榔
+榕
+榖
+榛
+榜
+榨
+榫
+榭
+榮
+榱
+榴
+榷
+榻
+槁
+槃
+構
+槌
+槍
+槎
+槐
+槓
+様
+槛
+槟
+槤
+槭
+槲
+槳
+槻
+槽
+槿
+樁
+樂
+樊
+樑
+樓
+標
+樞
+樟
+模
+樣
+権
+横
+樫
+樯
+樱
+樵
+樸
+樹
+樺
+樽
+樾
+橄
+橇
+橋
+橐
+橘
+橙
+機
+橡
+橢
+橫
+橱
+橹
+橼
+檀
+檄
+檎
+檐
+檔
+檗
+檜
+檢
+檬
+檯
+檳
+檸
+檻
+櫃
+櫚
+櫛
+櫥
+櫸
+櫻
+欄
+權
+欒
+欖
+欠
+次
+欢
+欣
+欧
+欲
+欸
+欺
+欽
+款
+歆
+歇
+歉
+歌
+歎
+歐
+歓
+歙
+歛
+歡
+止
+正
+此
+步
+武
+歧
+歩
+歪
+歯
+歲
+歳
+歴
+歷
+歸
+歹
+死
+歼
+殁
+殃
+殆
+殇
+殉
+殊
+残
+殒
+殓
+殖
+殘
+殞
+殡
+殤
+殭
+殯
+殲
+殴
+段
+殷
+殺
+殼
+殿
+毀
+毁
+毂
+毅
+毆
+毋
+母
+毎
+每
+毒
+毓
+比
+毕
+毗
+毘
+毙
+毛
+毡
+毫
+毯
+毽
+氈
+氏
+氐
+民
+氓
+气
+氖
+気
+氙
+氛
+氟
+氡
+氢
+氣
+氤
+氦
+氧
+氨
+氪
+氫
+氮
+氯
+氰
+氲
+水
+氷
+永
+氹
+氾
+汀
+汁
+求
+汆
+汇
+汉
+汎
+汐
+汕
+汗
+汙
+汛
+汝
+汞
+江
+池
+污
+汤
+汨
+汩
+汪
+汰
+汲
+汴
+汶
+汹
+決
+汽
+汾
+沁
+沂
+沃
+沅
+沈
+沉
+沌
+沏
+沐
+沒
+沓
+沖
+沙
+沛
+沟
+没
+沢
+沣
+沥
+沦
+沧
+沪
+沫
+沭
+沮
+沱
+河
+沸
+油
+治
+沼
+沽
+沾
+沿
+況
+泄
+泉
+泊
+泌
+泓
+法
+泗
+泛
+泞
+泠
+泡
+波
+泣
+泥
+注
+泪
+泫
+泮
+泯
+泰
+泱
+泳
+泵
+泷
+泸
+泻
+泼
+泽
+泾
+洁
+洄
+洋
+洒
+洗
+洙
+洛
+洞
+津
+洩
+洪
+洮
+洱
+洲
+洵
+洶
+洸
+洹
+活
+洼
+洽
+派
+流
+浃
+浄
+浅
+浆
+浇
+浊
+测
+济
+浏
+浑
+浒
+浓
+浔
+浙
+浚
+浜
+浣
+浦
+浩
+浪
+浬
+浮
+浯
+浴
+海
+浸
+涂
+涅
+涇
+消
+涉
+涌
+涎
+涓
+涔
+涕
+涙
+涛
+涝
+涞
+涟
+涠
+涡
+涣
+涤
+润
+涧
+涨
+涩
+涪
+涮
+涯
+液
+涵
+涸
+涼
+涿
+淀
+淄
+淅
+淆
+淇
+淋
+淌
+淑
+淒
+淖
+淘
+淙
+淚
+淞
+淡
+淤
+淦
+淨
+淩
+淪
+淫
+淬
+淮
+深
+淳
+淵
+混
+淹
+淺
+添
+淼
+清
+済
+渉
+渊
+渋
+渍
+渎
+渐
+渔
+渗
+渙
+渚
+減
+渝
+渠
+渡
+渣
+渤
+渥
+渦
+温
+測
+渭
+港
+渲
+渴
+游
+渺
+渾
+湃
+湄
+湊
+湍
+湖
+湘
+湛
+湟
+湧
+湫
+湮
+湯
+湳
+湾
+湿
+満
+溃
+溅
+溉
+溏
+源
+準
+溜
+溝
+溟
+溢
+溥
+溧
+溪
+溫
+溯
+溱
+溴
+溶
+溺
+溼
+滁
+滂
+滄
+滅
+滇
+滋
+滌
+滑
+滓
+滔
+滕
+滙
+滚
+滝
+滞
+滟
+满
+滢
+滤
+滥
+滦
+滨
+滩
+滬
+滯
+滲
+滴
+滷
+滸
+滾
+滿
+漁
+漂
+漆
+漉
+漏
+漓
+演
+漕
+漠
+漢
+漣
+漩
+漪
+漫
+漬
+漯
+漱
+漲
+漳
+漸
+漾
+漿
+潆
+潇
+潋
+潍
+潑
+潔
+潘
+潛
+潜
+潞
+潟
+潢
+潤
+潦
+潧
+潭
+潮
+潰
+潴
+潸
+潺
+潼
+澀
+澄
+澆
+澈
+澍
+澎
+澗
+澜
+澡
+澤
+澧
+澱
+澳
+澹
+激
+濁
+濂
+濃
+濑
+濒
+濕
+濘
+濛
+濟
+濠
+濡
+濤
+濫
+濬
+濮
+濯
+濱
+濺
+濾
+瀅
+瀆
+瀉
+瀋
+瀏
+瀑
+瀕
+瀘
+瀚
+瀛
+瀝
+瀞
+瀟
+瀧
+瀨
+瀬
+瀰
+瀾
+灌
+灏
+灑
+灘
+灝
+灞
+灣
+火
+灬
+灭
+灯
+灰
+灵
+灶
+灸
+灼
+災
+灾
+灿
+炀
+炁
+炅
+炉
+炊
+炎
+炒
+炔
+炕
+炖
+炙
+炜
+炫
+炬
+炭
+炮
+炯
+炳
+炷
+炸
+点
+為
+炼
+炽
+烁
+烂
+烃
+烈
+烊
+烏
+烘
+烙
+烛
+烟
+烤
+烦
+烧
+烨
+烩
+烫
+烬
+热
+烯
+烷
+烹
+烽
+焉
+焊
+焕
+焖
+焗
+焘
+焙
+焚
+焜
+無
+焦
+焯
+焰
+焱
+然
+焼
+煅
+煉
+煊
+煌
+煎
+煒
+煖
+煙
+煜
+煞
+煤
+煥
+煦
+照
+煨
+煩
+煮
+煲
+煸
+煽
+熄
+熊
+熏
+熒
+熔
+熙
+熟
+熠
+熨
+熬
+熱
+熵
+熹
+熾
+燁
+燃
+燄
+燈
+燉
+燊
+燎
+燒
+燔
+燕
+燙
+燜
+營
+燥
+燦
+燧
+燭
+燮
+燴
+燻
+燼
+燿
+爆
+爍
+爐
+爛
+爪
+爬
+爭
+爰
+爱
+爲
+爵
+父
+爷
+爸
+爹
+爺
+爻
+爽
+爾
+牆
+片
+版
+牌
+牍
+牒
+牙
+牛
+牝
+牟
+牠
+牡
+牢
+牦
+牧
+物
+牯
+牲
+牴
+牵
+特
+牺
+牽
+犀
+犁
+犄
+犊
+犍
+犒
+犢
+犧
+犬
+犯
+状
+犷
+犸
+犹
+狀
+狂
+狄
+狈
+狎
+狐
+狒
+狗
+狙
+狞
+狠
+狡
+狩
+独
+狭
+狮
+狰
+狱
+狸
+狹
+狼
+狽
+猎
+猕
+猖
+猗
+猙
+猛
+猜
+猝
+猥
+猩
+猪
+猫
+猬
+献
+猴
+猶
+猷
+猾
+猿
+獄
+獅
+獎
+獐
+獒
+獗
+獠
+獣
+獨
+獭
+獰
+獲
+獵
+獷
+獸
+獺
+獻
+獼
+獾
+玄
+率
+玉
+王
+玑
+玖
+玛
+玟
+玠
+玥
+玩
+玫
+玮
+环
+现
+玲
+玳
+玷
+玺
+玻
+珀
+珂
+珅
+珈
+珉
+珊
+珍
+珏
+珐
+珑
+珙
+珞
+珠
+珣
+珥
+珩
+珪
+班
+珮
+珲
+珺
+現
+球
+琅
+理
+琇
+琉
+琊
+琍
+琏
+琐
+琛
+琢
+琥
+琦
+琨
+琪
+琬
+琮
+琰
+琲
+琳
+琴
+琵
+琶
+琺
+琼
+瑀
+瑁
+瑄
+瑋
+瑕
+瑗
+瑙
+瑚
+瑛
+瑜
+瑞
+瑟
+瑠
+瑣
+瑤
+瑩
+瑪
+瑯
+瑰
+瑶
+瑾
+璀
+璁
+璃
+璇
+璉
+璋
+璎
+璐
+璜
+璞
+璟
+璧
+璨
+環
+璽
+璿
+瓊
+瓏
+瓒
+瓜
+瓢
+瓣
+瓤
+瓦
+瓮
+瓯
+瓴
+瓶
+瓷
+甄
+甌
+甕
+甘
+甙
+甚
+甜
+生
+產
+産
+甥
+甦
+用
+甩
+甫
+甬
+甭
+甯
+田
+由
+甲
+申
+电
+男
+甸
+町
+画
+甾
+畀
+畅
+界
+畏
+畑
+畔
+留
+畜
+畝
+畢
+略
+畦
+番
+畫
+異
+畲
+畳
+畴
+當
+畸
+畹
+畿
+疆
+疇
+疊
+疏
+疑
+疔
+疖
+疗
+疙
+疚
+疝
+疟
+疡
+疣
+疤
+疥
+疫
+疮
+疯
+疱
+疲
+疳
+疵
+疸
+疹
+疼
+疽
+疾
+痂
+病
+症
+痈
+痉
+痊
+痍
+痒
+痔
+痕
+痘
+痙
+痛
+痞
+痠
+痢
+痣
+痤
+痧
+痨
+痪
+痫
+痰
+痱
+痴
+痹
+痺
+痼
+痿
+瘀
+瘁
+瘋
+瘍
+瘓
+瘘
+瘙
+瘟
+瘠
+瘡
+瘢
+瘤
+瘦
+瘧
+瘩
+瘪
+瘫
+瘴
+瘸
+瘾
+療
+癇
+癌
+癒
+癖
+癜
+癞
+癡
+癢
+癣
+癥
+癫
+癬
+癮
+癱
+癲
+癸
+発
+登
+發
+白
+百
+皂
+的
+皆
+皇
+皈
+皋
+皎
+皑
+皓
+皖
+皙
+皚
+皮
+皰
+皱
+皴
+皺
+皿
+盂
+盃
+盅
+盆
+盈
+益
+盎
+盏
+盐
+监
+盒
+盔
+盖
+盗
+盘
+盛
+盜
+盞
+盟
+盡
+監
+盤
+盥
+盧
+盪
+目
+盯
+盱
+盲
+直
+相
+盹
+盼
+盾
+省
+眈
+眉
+看
+県
+眙
+眞
+真
+眠
+眦
+眨
+眩
+眯
+眶
+眷
+眸
+眺
+眼
+眾
+着
+睁
+睇
+睏
+睐
+睑
+睛
+睜
+睞
+睡
+睢
+督
+睥
+睦
+睨
+睪
+睫
+睬
+睹
+睽
+睾
+睿
+瞄
+瞅
+瞇
+瞋
+瞌
+瞎
+瞑
+瞒
+瞓
+瞞
+瞟
+瞠
+瞥
+瞧
+瞩
+瞪
+瞬
+瞭
+瞰
+瞳
+瞻
+瞼
+瞿
+矇
+矍
+矗
+矚
+矛
+矜
+矢
+矣
+知
+矩
+矫
+短
+矮
+矯
+石
+矶
+矽
+矾
+矿
+码
+砂
+砌
+砍
+砒
+研
+砖
+砗
+砚
+砝
+砣
+砥
+砧
+砭
+砰
+砲
+破
+砷
+砸
+砺
+砼
+砾
+础
+硅
+硐
+硒
+硕
+硝
+硫
+硬
+确
+硯
+硼
+碁
+碇
+碉
+碌
+碍
+碎
+碑
+碓
+碗
+碘
+碚
+碛
+碟
+碣
+碧
+碩
+碰
+碱
+碳
+碴
+確
+碼
+碾
+磁
+磅
+磊
+磋
+磐
+磕
+磚
+磡
+磨
+磬
+磯
+磲
+磷
+磺
+礁
+礎
+礙
+礡
+礦
+礪
+礫
+礴
+示
+礼
+社
+祀
+祁
+祂
+祇
+祈
+祉
+祎
+祐
+祕
+祖
+祗
+祚
+祛
+祜
+祝
+神
+祟
+祠
+祢
+祥
+票
+祭
+祯
+祷
+祸
+祺
+祿
+禀
+禁
+禄
+禅
+禍
+禎
+福
+禛
+禦
+禧
+禪
+禮
+禱
+禹
+禺
+离
+禽
+禾
+禿
+秀
+私
+秃
+秆
+秉
+秋
+种
+科
+秒
+秘
+租
+秣
+秤
+秦
+秧
+秩
+秭
+积
+称
+秸
+移
+秽
+稀
+稅
+程
+稍
+税
+稔
+稗
+稚
+稜
+稞
+稟
+稠
+稣
+種
+稱
+稲
+稳
+稷
+稹
+稻
+稼
+稽
+稿
+穀
+穂
+穆
+穌
+積
+穎
+穗
+穢
+穩
+穫
+穴
+究
+穷
+穹
+空
+穿
+突
+窃
+窄
+窈
+窍
+窑
+窒
+窓
+窕
+窖
+窗
+窘
+窜
+窝
+窟
+窠
+窥
+窦
+窨
+窩
+窪
+窮
+窯
+窺
+窿
+竄
+竅
+竇
+竊
+立
+竖
+站
+竜
+竞
+竟
+章
+竣
+童
+竭
+端
+競
+竹
+竺
+竽
+竿
+笃
+笆
+笈
+笋
+笏
+笑
+笔
+笙
+笛
+笞
+笠
+符
+笨
+第
+笹
+笺
+笼
+筆
+等
+筊
+筋
+筍
+筏
+筐
+筑
+筒
+答
+策
+筛
+筝
+筠
+筱
+筲
+筵
+筷
+筹
+签
+简
+箇
+箋
+箍
+箏
+箐
+箔
+箕
+算
+箝
+管
+箩
+箫
+箭
+箱
+箴
+箸
+節
+篁
+範
+篆
+篇
+築
+篑
+篓
+篙
+篝
+篠
+篡
+篤
+篩
+篪
+篮
+篱
+篷
+簇
+簌
+簍
+簡
+簦
+簧
+簪
+簫
+簷
+簸
+簽
+簾
+簿
+籁
+籃
+籌
+籍
+籐
+籟
+籠
+籤
+籬
+籮
+籲
+米
+类
+籼
+籽
+粄
+粉
+粑
+粒
+粕
+粗
+粘
+粟
+粤
+粥
+粧
+粪
+粮
+粱
+粲
+粳
+粵
+粹
+粼
+粽
+精
+粿
+糅
+糊
+糍
+糕
+糖
+糗
+糙
+糜
+糞
+糟
+糠
+糧
+糬
+糯
+糰
+糸
+系
+糾
+紀
+紂
+約
+紅
+紉
+紊
+紋
+納
+紐
+紓
+純
+紗
+紘
+紙
+級
+紛
+紜
+素
+紡
+索
+紧
+紫
+紮
+累
+細
+紳
+紹
+紺
+終
+絃
+組
+絆
+経
+結
+絕
+絞
+絡
+絢
+給
+絨
+絮
+統
+絲
+絳
+絵
+絶
+絹
+綁
+綏
+綑
+經
+継
+続
+綜
+綠
+綢
+綦
+綫
+綬
+維
+綱
+網
+綴
+綵
+綸
+綺
+綻
+綽
+綾
+綿
+緊
+緋
+総
+緑
+緒
+緘
+線
+緝
+緞
+締
+緣
+編
+緩
+緬
+緯
+練
+緹
+緻
+縁
+縄
+縈
+縛
+縝
+縣
+縫
+縮
+縱
+縴
+縷
+總
+績
+繁
+繃
+繆
+繇
+繋
+織
+繕
+繚
+繞
+繡
+繩
+繪
+繫
+繭
+繳
+繹
+繼
+繽
+纂
+續
+纍
+纏
+纓
+纔
+纖
+纜
+纠
+红
+纣
+纤
+约
+级
+纨
+纪
+纫
+纬
+纭
+纯
+纰
+纱
+纲
+纳
+纵
+纶
+纷
+纸
+纹
+纺
+纽
+纾
+线
+绀
+练
+组
+绅
+细
+织
+终
+绊
+绍
+绎
+经
+绑
+绒
+结
+绔
+绕
+绘
+给
+绚
+绛
+络
+绝
+绞
+统
+绡
+绢
+绣
+绥
+绦
+继
+绩
+绪
+绫
+续
+绮
+绯
+绰
+绳
+维
+绵
+绶
+绷
+绸
+绻
+综
+绽
+绾
+绿
+缀
+缄
+缅
+缆
+缇
+缈
+缉
+缎
+缓
+缔
+缕
+编
+缘
+缙
+缚
+缜
+缝
+缠
+缢
+缤
+缥
+缨
+缩
+缪
+缭
+缮
+缰
+缱
+缴
+缸
+缺
+缽
+罂
+罄
+罌
+罐
+网
+罔
+罕
+罗
+罚
+罡
+罢
+罩
+罪
+置
+罰
+署
+罵
+罷
+罹
+羁
+羅
+羈
+羊
+羌
+美
+羔
+羚
+羞
+羟
+羡
+羣
+群
+羥
+羧
+羨
+義
+羯
+羲
+羸
+羹
+羽
+羿
+翁
+翅
+翊
+翌
+翎
+習
+翔
+翘
+翟
+翠
+翡
+翦
+翩
+翰
+翱
+翳
+翹
+翻
+翼
+耀
+老
+考
+耄
+者
+耆
+耋
+而
+耍
+耐
+耒
+耕
+耗
+耘
+耙
+耦
+耨
+耳
+耶
+耷
+耸
+耻
+耽
+耿
+聂
+聆
+聊
+聋
+职
+聒
+联
+聖
+聘
+聚
+聞
+聪
+聯
+聰
+聲
+聳
+聴
+聶
+職
+聽
+聾
+聿
+肃
+肄
+肅
+肆
+肇
+肉
+肋
+肌
+肏
+肓
+肖
+肘
+肚
+肛
+肝
+肠
+股
+肢
+肤
+肥
+肩
+肪
+肮
+肯
+肱
+育
+肴
+肺
+肽
+肾
+肿
+胀
+胁
+胃
+胄
+胆
+背
+胍
+胎
+胖
+胚
+胛
+胜
+胝
+胞
+胡
+胤
+胥
+胧
+胫
+胭
+胯
+胰
+胱
+胳
+胴
+胶
+胸
+胺
+能
+脂
+脅
+脆
+脇
+脈
+脉
+脊
+脍
+脏
+脐
+脑
+脓
+脖
+脘
+脚
+脛
+脣
+脩
+脫
+脯
+脱
+脲
+脳
+脸
+脹
+脾
+腆
+腈
+腊
+腋
+腌
+腎
+腐
+腑
+腓
+腔
+腕
+腥
+腦
+腩
+腫
+腭
+腮
+腰
+腱
+腳
+腴
+腸
+腹
+腺
+腻
+腼
+腾
+腿
+膀
+膈
+膊
+膏
+膑
+膘
+膚
+膛
+膜
+膝
+膠
+膦
+膨
+膩
+膳
+膺
+膻
+膽
+膾
+膿
+臀
+臂
+臃
+臆
+臉
+臊
+臍
+臓
+臘
+臟
+臣
+臥
+臧
+臨
+自
+臬
+臭
+至
+致
+臺
+臻
+臼
+臾
+舀
+舂
+舅
+舆
+與
+興
+舉
+舊
+舌
+舍
+舎
+舐
+舒
+舔
+舖
+舗
+舛
+舜
+舞
+舟
+航
+舫
+般
+舰
+舱
+舵
+舶
+舷
+舸
+船
+舺
+舾
+艇
+艋
+艘
+艙
+艦
+艮
+良
+艰
+艱
+色
+艳
+艷
+艹
+艺
+艾
+节
+芃
+芈
+芊
+芋
+芍
+芎
+芒
+芙
+芜
+芝
+芡
+芥
+芦
+芩
+芪
+芫
+芬
+芭
+芮
+芯
+花
+芳
+芷
+芸
+芹
+芻
+芽
+芾
+苁
+苄
+苇
+苋
+苍
+苏
+苑
+苒
+苓
+苔
+苕
+苗
+苛
+苜
+苞
+苟
+苡
+苣
+若
+苦
+苫
+苯
+英
+苷
+苹
+苻
+茁
+茂
+范
+茄
+茅
+茉
+茎
+茏
+茗
+茜
+茧
+茨
+茫
+茬
+茭
+茯
+茱
+茲
+茴
+茵
+茶
+茸
+茹
+茼
+荀
+荃
+荆
+草
+荊
+荏
+荐
+荒
+荔
+荖
+荘
+荚
+荞
+荟
+荠
+荡
+荣
+荤
+荥
+荧
+荨
+荪
+荫
+药
+荳
+荷
+荸
+荻
+荼
+荽
+莅
+莆
+莉
+莊
+莎
+莒
+莓
+莖
+莘
+莞
+莠
+莢
+莧
+莪
+莫
+莱
+莲
+莴
+获
+莹
+莺
+莽
+莿
+菀
+菁
+菅
+菇
+菈
+菊
+菌
+菏
+菓
+菖
+菘
+菜
+菟
+菠
+菡
+菩
+華
+菱
+菲
+菸
+菽
+萁
+萃
+萄
+萊
+萋
+萌
+萍
+萎
+萘
+萝
+萤
+营
+萦
+萧
+萨
+萩
+萬
+萱
+萵
+萸
+萼
+落
+葆
+葉
+著
+葚
+葛
+葡
+董
+葦
+葩
+葫
+葬
+葭
+葯
+葱
+葳
+葵
+葷
+葺
+蒂
+蒋
+蒐
+蒔
+蒙
+蒜
+蒞
+蒟
+蒡
+蒨
+蒲
+蒸
+蒹
+蒻
+蒼
+蒿
+蓁
+蓄
+蓆
+蓉
+蓋
+蓑
+蓓
+蓖
+蓝
+蓟
+蓦
+蓬
+蓮
+蓼
+蓿
+蔑
+蔓
+蔔
+蔗
+蔘
+蔚
+蔡
+蔣
+蔥
+蔫
+蔬
+蔭
+蔵
+蔷
+蔺
+蔻
+蔼
+蔽
+蕁
+蕃
+蕈
+蕉
+蕊
+蕎
+蕙
+蕤
+蕨
+蕩
+蕪
+蕭
+蕲
+蕴
+蕻
+蕾
+薄
+薅
+薇
+薈
+薊
+薏
+薑
+薔
+薙
+薛
+薦
+薨
+薩
+薪
+薬
+薯
+薰
+薹
+藉
+藍
+藏
+藐
+藓
+藕
+藜
+藝
+藤
+藥
+藩
+藹
+藻
+藿
+蘆
+蘇
+蘊
+蘋
+蘑
+蘚
+蘭
+蘸
+蘼
+蘿
+虎
+虏
+虐
+虑
+虔
+處
+虚
+虛
+虜
+虞
+號
+虢
+虧
+虫
+虬
+虱
+虹
+虻
+虽
+虾
+蚀
+蚁
+蚂
+蚊
+蚌
+蚓
+蚕
+蚜
+蚝
+蚣
+蚤
+蚩
+蚪
+蚯
+蚱
+蚵
+蛀
+蛆
+蛇
+蛊
+蛋
+蛎
+蛐
+蛔
+蛙
+蛛
+蛟
+蛤
+蛭
+蛮
+蛰
+蛳
+蛹
+蛻
+蛾
+蜀
+蜂
+蜃
+蜆
+蜇
+蜈
+蜊
+蜍
+蜒
+蜓
+蜕
+蜗
+蜘
+蜚
+蜜
+蜡
+蜢
+蜥
+蜱
+蜴
+蜷
+蜻
+蜿
+蝇
+蝈
+蝉
+蝌
+蝎
+蝕
+蝗
+蝙
+蝟
+蝠
+蝦
+蝨
+蝴
+蝶
+蝸
+蝼
+螂
+螃
+融
+螞
+螢
+螨
+螯
+螳
+螺
+蟀
+蟄
+蟆
+蟋
+蟎
+蟑
+蟒
+蟠
+蟬
+蟲
+蟹
+蟻
+蟾
+蠅
+蠍
+蠔
+蠕
+蠛
+蠟
+蠡
+蠢
+蠣
+蠱
+蠶
+蠹
+蠻
+血
+衄
+衅
+衆
+行
+衍
+術
+衔
+街
+衙
+衛
+衝
+衞
+衡
+衢
+衣
+补
+表
+衩
+衫
+衬
+衮
+衰
+衲
+衷
+衹
+衾
+衿
+袁
+袂
+袄
+袅
+袈
+袋
+袍
+袒
+袖
+袜
+袞
+袤
+袪
+被
+袭
+袱
+裁
+裂
+装
+裆
+裊
+裏
+裔
+裕
+裘
+裙
+補
+裝
+裟
+裡
+裤
+裨
+裱
+裳
+裴
+裸
+裹
+製
+裾
+褂
+複
+褐
+褒
+褓
+褔
+褚
+褥
+褪
+褫
+褲
+褶
+褻
+襁
+襄
+襟
+襠
+襪
+襬
+襯
+襲
+西
+要
+覃
+覆
+覇
+見
+規
+覓
+視
+覚
+覦
+覧
+親
+覬
+観
+覷
+覺
+覽
+觀
+见
+观
+规
+觅
+视
+览
+觉
+觊
+觎
+觐
+觑
+角
+觞
+解
+觥
+触
+觸
+言
+訂
+計
+訊
+討
+訓
+訕
+訖
+託
+記
+訛
+訝
+訟
+訣
+訥
+訪
+設
+許
+訳
+訴
+訶
+診
+註
+証
+詆
+詐
+詔
+評
+詛
+詞
+詠
+詡
+詢
+詣
+試
+詩
+詫
+詬
+詭
+詮
+詰
+話
+該
+詳
+詹
+詼
+誅
+誇
+誉
+誌
+認
+誓
+誕
+誘
+語
+誠
+誡
+誣
+誤
+誥
+誦
+誨
+說
+説
+読
+誰
+課
+誹
+誼
+調
+諄
+談
+請
+諏
+諒
+論
+諗
+諜
+諡
+諦
+諧
+諫
+諭
+諮
+諱
+諳
+諷
+諸
+諺
+諾
+謀
+謁
+謂
+謄
+謊
+謎
+謐
+謔
+謗
+謙
+講
+謝
+謠
+謨
+謬
+謹
+謾
+譁
+證
+譎
+譏
+識
+譙
+譚
+譜
+警
+譬
+譯
+議
+譲
+譴
+護
+譽
+讀
+變
+讓
+讚
+讞
+计
+订
+认
+讥
+讧
+讨
+让
+讪
+讫
+训
+议
+讯
+记
+讲
+讳
+讴
+讶
+讷
+许
+讹
+论
+讼
+讽
+设
+访
+诀
+证
+诃
+评
+诅
+识
+诈
+诉
+诊
+诋
+词
+诏
+译
+试
+诗
+诘
+诙
+诚
+诛
+话
+诞
+诟
+诠
+诡
+询
+诣
+诤
+该
+详
+诧
+诩
+诫
+诬
+语
+误
+诰
+诱
+诲
+说
+诵
+诶
+请
+诸
+诺
+读
+诽
+课
+诿
+谀
+谁
+调
+谄
+谅
+谆
+谈
+谊
+谋
+谌
+谍
+谎
+谏
+谐
+谑
+谒
+谓
+谔
+谕
+谗
+谘
+谙
+谚
+谛
+谜
+谟
+谢
+谣
+谤
+谥
+谦
+谧
+谨
+谩
+谪
+谬
+谭
+谯
+谱
+谲
+谴
+谶
+谷
+豁
+豆
+豇
+豈
+豉
+豊
+豌
+豎
+豐
+豔
+豚
+象
+豢
+豪
+豫
+豬
+豹
+豺
+貂
+貅
+貌
+貓
+貔
+貘
+貝
+貞
+負
+財
+貢
+貧
+貨
+販
+貪
+貫
+責
+貯
+貰
+貳
+貴
+貶
+買
+貸
+費
+貼
+貽
+貿
+賀
+賁
+賂
+賃
+賄
+資
+賈
+賊
+賑
+賓
+賜
+賞
+賠
+賡
+賢
+賣
+賤
+賦
+質
+賬
+賭
+賴
+賺
+購
+賽
+贅
+贈
+贊
+贍
+贏
+贓
+贖
+贛
+贝
+贞
+负
+贡
+财
+责
+贤
+败
+账
+货
+质
+贩
+贪
+贫
+贬
+购
+贮
+贯
+贰
+贱
+贲
+贴
+贵
+贷
+贸
+费
+贺
+贻
+贼
+贾
+贿
+赁
+赂
+赃
+资
+赅
+赈
+赊
+赋
+赌
+赎
+赏
+赐
+赓
+赔
+赖
+赘
+赚
+赛
+赝
+赞
+赠
+赡
+赢
+赣
+赤
+赦
+赧
+赫
+赭
+走
+赳
+赴
+赵
+赶
+起
+趁
+超
+越
+趋
+趕
+趙
+趟
+趣
+趨
+足
+趴
+趵
+趸
+趺
+趾
+跃
+跄
+跆
+跋
+跌
+跎
+跑
+跖
+跚
+跛
+距
+跟
+跡
+跤
+跨
+跩
+跪
+路
+跳
+践
+跷
+跹
+跺
+跻
+踉
+踊
+踌
+踏
+踐
+踝
+踞
+踟
+踢
+踩
+踪
+踮
+踱
+踴
+踵
+踹
+蹂
+蹄
+蹇
+蹈
+蹉
+蹊
+蹋
+蹑
+蹒
+蹙
+蹟
+蹣
+蹤
+蹦
+蹩
+蹬
+蹭
+蹲
+蹴
+蹶
+蹺
+蹼
+蹿
+躁
+躇
+躉
+躊
+躋
+躍
+躏
+躪
+身
+躬
+躯
+躲
+躺
+軀
+車
+軋
+軌
+軍
+軒
+軟
+転
+軸
+軼
+軽
+軾
+較
+載
+輒
+輓
+輔
+輕
+輛
+輝
+輟
+輩
+輪
+輯
+輸
+輻
+輾
+輿
+轄
+轅
+轆
+轉
+轍
+轎
+轟
+车
+轧
+轨
+轩
+转
+轭
+轮
+软
+轰
+轲
+轴
+轶
+轻
+轼
+载
+轿
+较
+辄
+辅
+辆
+辇
+辈
+辉
+辊
+辍
+辐
+辑
+输
+辕
+辖
+辗
+辘
+辙
+辛
+辜
+辞
+辟
+辣
+辦
+辨
+辩
+辫
+辭
+辮
+辯
+辰
+辱
+農
+边
+辺
+辻
+込
+辽
+达
+迁
+迂
+迄
+迅
+过
+迈
+迎
+运
+近
+返
+还
+这
+进
+远
+违
+连
+迟
+迢
+迤
+迥
+迦
+迩
+迪
+迫
+迭
+述
+迴
+迷
+迸
+迹
+迺
+追
+退
+送
+适
+逃
+逅
+逆
+选
+逊
+逍
+透
+逐
+递
+途
+逕
+逗
+這
+通
+逛
+逝
+逞
+速
+造
+逢
+連
+逮
+週
+進
+逵
+逶
+逸
+逻
+逼
+逾
+遁
+遂
+遅
+遇
+遊
+運
+遍
+過
+遏
+遐
+遑
+遒
+道
+達
+違
+遗
+遙
+遛
+遜
+遞
+遠
+遢
+遣
+遥
+遨
+適
+遭
+遮
+遲
+遴
+遵
+遶
+遷
+選
+遺
+遼
+遽
+避
+邀
+邁
+邂
+邃
+還
+邇
+邈
+邊
+邋
+邏
+邑
+邓
+邕
+邛
+邝
+邢
+那
+邦
+邨
+邪
+邬
+邮
+邯
+邰
+邱
+邳
+邵
+邸
+邹
+邺
+邻
+郁
+郅
+郊
+郎
+郑
+郜
+郝
+郡
+郢
+郤
+郦
+郧
+部
+郫
+郭
+郴
+郵
+郷
+郸
+都
+鄂
+鄉
+鄒
+鄔
+鄙
+鄞
+鄢
+鄧
+鄭
+鄰
+鄱
+鄲
+鄺
+酉
+酊
+酋
+酌
+配
+酐
+酒
+酗
+酚
+酝
+酢
+酣
+酥
+酩
+酪
+酬
+酮
+酯
+酰
+酱
+酵
+酶
+酷
+酸
+酿
+醃
+醇
+醉
+醋
+醍
+醐
+醒
+醚
+醛
+醜
+醞
+醣
+醪
+醫
+醬
+醮
+醯
+醴
+醺
+釀
+釁
+采
+釉
+释
+釋
+里
+重
+野
+量
+釐
+金
+釗
+釘
+釜
+針
+釣
+釦
+釧
+釵
+鈀
+鈉
+鈍
+鈎
+鈔
+鈕
+鈞
+鈣
+鈦
+鈪
+鈴
+鈺
+鈾
+鉀
+鉄
+鉅
+鉉
+鉑
+鉗
+鉚
+鉛
+鉤
+鉴
+鉻
+銀
+銃
+銅
+銑
+銓
+銖
+銘
+銜
+銬
+銭
+銮
+銳
+銷
+銹
+鋁
+鋅
+鋒
+鋤
+鋪
+鋰
+鋸
+鋼
+錄
+錐
+錘
+錚
+錠
+錢
+錦
+錨
+錫
+錮
+錯
+録
+錳
+錶
+鍊
+鍋
+鍍
+鍛
+鍥
+鍰
+鍵
+鍺
+鍾
+鎂
+鎊
+鎌
+鎏
+鎔
+鎖
+鎗
+鎚
+鎧
+鎬
+鎮
+鎳
+鏈
+鏖
+鏗
+鏘
+鏞
+鏟
+鏡
+鏢
+鏤
+鏽
+鐘
+鐮
+鐲
+鐳
+鐵
+鐸
+鐺
+鑄
+鑊
+鑑
+鑒
+鑣
+鑫
+鑰
+鑲
+鑼
+鑽
+鑾
+鑿
+针
+钉
+钊
+钎
+钏
+钒
+钓
+钗
+钙
+钛
+钜
+钝
+钞
+钟
+钠
+钡
+钢
+钣
+钤
+钥
+钦
+钧
+钨
+钩
+钮
+钯
+钰
+钱
+钳
+钴
+钵
+钺
+钻
+钼
+钾
+钿
+铀
+铁
+铂
+铃
+铄
+铅
+铆
+铉
+铎
+铐
+铛
+铜
+铝
+铠
+铡
+铢
+铣
+铤
+铨
+铩
+铬
+铭
+铮
+铰
+铲
+铵
+银
+铸
+铺
+链
+铿
+销
+锁
+锂
+锄
+锅
+锆
+锈
+锉
+锋
+锌
+锏
+锐
+锑
+错
+锚
+锟
+锡
+锢
+锣
+锤
+锥
+锦
+锭
+键
+锯
+锰
+锲
+锵
+锹
+锺
+锻
+镀
+镁
+镂
+镇
+镉
+镌
+镍
+镐
+镑
+镕
+镖
+镗
+镛
+镜
+镣
+镭
+镯
+镰
+镳
+镶
+長
+长
+門
+閃
+閉
+開
+閎
+閏
+閑
+閒
+間
+閔
+閘
+閡
+関
+閣
+閥
+閨
+閩
+閱
+閲
+閹
+閻
+閾
+闆
+闇
+闊
+闌
+闍
+闔
+闕
+闖
+闘
+關
+闡
+闢
+门
+闪
+闫
+闭
+问
+闯
+闰
+闲
+间
+闵
+闷
+闸
+闹
+闺
+闻
+闽
+闾
+阀
+阁
+阂
+阅
+阆
+阇
+阈
+阉
+阎
+阐
+阑
+阔
+阕
+阖
+阙
+阚
+阜
+队
+阡
+阪
+阮
+阱
+防
+阳
+阴
+阵
+阶
+阻
+阿
+陀
+陂
+附
+际
+陆
+陇
+陈
+陋
+陌
+降
+限
+陕
+陛
+陝
+陞
+陟
+陡
+院
+陣
+除
+陨
+险
+陪
+陰
+陲
+陳
+陵
+陶
+陷
+陸
+険
+陽
+隅
+隆
+隈
+隊
+隋
+隍
+階
+随
+隐
+隔
+隕
+隘
+隙
+際
+障
+隠
+隣
+隧
+隨
+險
+隱
+隴
+隶
+隸
+隻
+隼
+隽
+难
+雀
+雁
+雄
+雅
+集
+雇
+雉
+雋
+雌
+雍
+雎
+雏
+雑
+雒
+雕
+雖
+雙
+雛
+雜
+雞
+離
+難
+雨
+雪
+雯
+雰
+雲
+雳
+零
+雷
+雹
+電
+雾
+需
+霁
+霄
+霆
+震
+霈
+霉
+霊
+霍
+霎
+霏
+霑
+霓
+霖
+霜
+霞
+霧
+霭
+霰
+露
+霸
+霹
+霽
+霾
+靂
+靄
+靈
+青
+靓
+靖
+静
+靚
+靛
+靜
+非
+靠
+靡
+面
+靥
+靦
+革
+靳
+靴
+靶
+靼
+鞅
+鞋
+鞍
+鞏
+鞑
+鞘
+鞠
+鞣
+鞦
+鞭
+韆
+韋
+韌
+韓
+韜
+韦
+韧
+韩
+韬
+韭
+音
+韵
+韶
+韻
+響
+頁
+頂
+頃
+項
+順
+須
+頌
+預
+頑
+頒
+頓
+頗
+領
+頜
+頡
+頤
+頫
+頭
+頰
+頷
+頸
+頹
+頻
+頼
+顆
+題
+額
+顎
+顏
+顔
+願
+顛
+類
+顧
+顫
+顯
+顱
+顴
+页
+顶
+顷
+项
+顺
+须
+顼
+顽
+顾
+顿
+颁
+颂
+预
+颅
+领
+颇
+颈
+颉
+颊
+颌
+颍
+颐
+频
+颓
+颔
+颖
+颗
+题
+颚
+颛
+颜
+额
+颞
+颠
+颡
+颢
+颤
+颦
+颧
+風
+颯
+颱
+颳
+颶
+颼
+飄
+飆
+风
+飒
+飓
+飕
+飘
+飙
+飚
+飛
+飞
+食
+飢
+飨
+飩
+飪
+飯
+飲
+飼
+飽
+飾
+餃
+餅
+餉
+養
+餌
+餐
+餒
+餓
+餘
+餚
+餛
+餞
+餡
+館
+餮
+餵
+餾
+饅
+饈
+饋
+饌
+饍
+饑
+饒
+饕
+饗
+饞
+饥
+饨
+饪
+饬
+饭
+饮
+饯
+饰
+饱
+饲
+饴
+饵
+饶
+饷
+饺
+饼
+饽
+饿
+馀
+馁
+馄
+馅
+馆
+馈
+馋
+馍
+馏
+馒
+馔
+首
+馗
+香
+馥
+馨
+馬
+馭
+馮
+馳
+馴
+駁
+駄
+駅
+駆
+駐
+駒
+駕
+駛
+駝
+駭
+駱
+駿
+騁
+騎
+騏
+験
+騙
+騨
+騰
+騷
+驀
+驅
+驊
+驍
+驒
+驕
+驗
+驚
+驛
+驟
+驢
+驥
+马
+驭
+驮
+驯
+驰
+驱
+驳
+驴
+驶
+驷
+驸
+驹
+驻
+驼
+驾
+驿
+骁
+骂
+骄
+骅
+骆
+骇
+骈
+骊
+骋
+验
+骏
+骐
+骑
+骗
+骚
+骛
+骜
+骞
+骠
+骡
+骤
+骥
+骧
+骨
+骯
+骰
+骶
+骷
+骸
+骼
+髂
+髅
+髋
+髏
+髒
+髓
+體
+髖
+高
+髦
+髪
+髮
+髯
+髻
+鬃
+鬆
+鬍
+鬓
+鬚
+鬟
+鬢
+鬣
+鬥
+鬧
+鬱
+鬼
+魁
+魂
+魄
+魅
+魇
+魍
+魏
+魔
+魘
+魚
+魯
+魷
+鮑
+鮨
+鮪
+鮭
+鮮
+鯉
+鯊
+鯖
+鯛
+鯨
+鯰
+鯽
+鰍
+鰓
+鰭
+鰲
+鰻
+鰾
+鱈
+鱉
+鱔
+鱗
+鱷
+鱸
+鱼
+鱿
+鲁
+鲈
+鲍
+鲑
+鲛
+鲜
+鲟
+鲢
+鲤
+鲨
+鲫
+鲱
+鲲
+鲶
+鲷
+鲸
+鳃
+鳄
+鳅
+鳌
+鳍
+鳕
+鳖
+鳗
+鳝
+鳞
+鳥
+鳩
+鳳
+鳴
+鳶
+鴉
+鴕
+鴛
+鴦
+鴨
+鴻
+鴿
+鵑
+鵜
+鵝
+鵡
+鵬
+鵰
+鵲
+鶘
+鶩
+鶯
+鶴
+鷗
+鷲
+鷹
+鷺
+鸚
+鸞
+鸟
+鸠
+鸡
+鸢
+鸣
+鸥
+鸦
+鸨
+鸪
+鸭
+鸯
+鸳
+鸵
+鸽
+鸾
+鸿
+鹂
+鹃
+鹄
+鹅
+鹈
+鹉
+鹊
+鹌
+鹏
+鹑
+鹕
+鹘
+鹜
+鹞
+鹤
+鹦
+鹧
+鹫
+鹭
+鹰
+鹳
+鹵
+鹹
+鹼
+鹽
+鹿
+麂
+麋
+麒
+麓
+麗
+麝
+麟
+麥
+麦
+麩
+麴
+麵
+麸
+麺
+麻
+麼
+麽
+麾
+黃
+黄
+黍
+黎
+黏
+黑
+黒
+黔
+默
+黛
+黜
+黝
+點
+黠
+黨
+黯
+黴
+鼋
+鼎
+鼐
+鼓
+鼠
+鼬
+鼹
+鼻
+鼾
+齁
+齊
+齋
+齐
+齒
+齡
+齢
+齣
+齦
+齿
+龄
+龅
+龈
+龊
+龋
+龌
+龍
+龐
+龔
+龕
+龙
+龚
+龛
+龜
+龟
+︰
+︱
+︶
+︿
+﹁
+﹂
+﹍
+﹏
+﹐
+﹑
+﹒
+﹔
+﹕
+﹖
+﹗
+﹙
+﹚
+﹝
+﹞
+﹡
+﹣
+!
+"
+#
+$
+%
+&
+'
+(
+)
+*
++
+,
+-
+.
+/
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+:
+;
+<
+=
+>
+?
+@
+[
+\
+]
+^
+_
+`
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z
+{
+|
+}
+~
+。
+「
+」
+、
+・
+ッ
+ー
+イ
+ク
+シ
+ス
+ト
+ノ
+フ
+ラ
+ル
+ン
+゙
+゚
+ ̄
+¥
+👍
+🔥
+😂
+😎
+...
+yam
+10
+2017
+12
+11
+2016
+20
+30
+15
+06
+lofter
+##s
+2015
+by
+16
+14
+18
+13
+24
+17
+2014
+21
+##0
+22
+19
+25
+23
+com
+100
+00
+05
+2013
+##a
+03
+09
+08
+28
+##2
+50
+01
+04
+##1
+27
+02
+2012
+##3
+26
+##e
+07
+##8
+##5
+##6
+##4
+##9
+##7
+29
+2011
+40
+##t
+2010
+##o
+##d
+##i
+2009
+##n
+app
+www
+the
+##m
+31
+##c
+##l
+##y
+##r
+##g
+2008
+60
+http
+200
+qq
+##p
+80
+##f
+google
+pixnet
+90
+cookies
+tripadvisor
+500
+##er
+##k
+35
+##h
+facebook
+2007
+2000
+70
+##b
+of
+##x
+##u
+45
+300
+iphone
+32
+1000
+2006
+48
+ip
+36
+in
+38
+3d
+##w
+##ing
+55
+ctrip
+##on
+##v
+33
+##の
+to
+34
+400
+id
+2005
+it
+37
+windows
+llc
+top
+99
+42
+39
+000
+led
+at
+##an
+41
+51
+52
+46
+49
+43
+53
+44
+##z
+android
+58
+and
+59
+2004
+56
+vr
+##か
+5000
+2003
+47
+blogthis
+twitter
+54
+##le
+150
+ok
+2018
+57
+75
+cn
+no
+ios
+##in
+##mm
+##00
+800
+on
+te
+3000
+65
+2001
+360
+95
+ig
+lv
+120
+##ng
+##を
+##us
+##に
+pc
+てす
+──
+600
+##te
+85
+2002
+88
+##ed
+html
+ncc
+wifi
+email
+64
+blog
+is
+##10
+##て
+mail
+online
+##al
+dvd
+##ic
+studio
+##は
+##℃
+##ia
+##と
+line
+vip
+72
+##q
+98
+##ce
+##en
+for
+##is
+##ra
+##es
+##j
+usb
+net
+cp
+1999
+asia
+4g
+##cm
+diy
+new
+3c
+##お
+ta
+66
+language
+vs
+apple
+tw
+86
+web
+##ne
+ipad
+62
+you
+##re
+101
+68
+##tion
+ps
+de
+bt
+pony
+atm
+##2017
+1998
+67
+##ch
+ceo
+##or
+go
+##na
+av
+pro
+cafe
+96
+pinterest
+97
+63
+pixstyleme3c
+##ta
+more
+said
+##2016
+1997
+mp3
+700
+##ll
+nba
+jun
+##20
+92
+tv
+1995
+pm
+61
+76
+nbsp
+250
+##ie
+linux
+##ma
+cd
+110
+hd
+##17
+78
+##ion
+77
+6000
+am
+##th
+##st
+94
+##se
+##et
+69
+180
+gdp
+my
+105
+81
+abc
+89
+flash
+79
+one
+93
+1990
+1996
+##ck
+gps
+##も
+##ly
+web885
+106
+2020
+91
+##ge
+4000
+1500
+xd
+boss
+isbn
+1994
+org
+##ry
+me
+love
+##11
+0fork
+73
+##12
+3g
+##ter
+##ar
+71
+82
+##la
+hotel
+130
+1970
+pk
+83
+87
+140
+ie
+##os
+##30
+##el
+74
+##50
+seo
+cpu
+##ml
+p2p
+84
+may
+##る
+sun
+tue
+internet
+cc
+posted
+youtube
+##at
+##ン
+##man
+ii
+##ル
+##15
+abs
+nt
+pdf
+yahoo
+ago
+1980
+##it
+news
+mac
+104
+##てす
+##me
+##り
+java
+1992
+spa
+##de
+##nt
+hk
+all
+plus
+la
+1993
+##mb
+##16
+##ve
+west
+##da
+160
+air
+##い
+##ps
+から
+##to
+1989
+logo
+htc
+php
+https
+fi
+momo
+##son
+sat
+##ke
+##80
+ebd
+suv
+wi
+day
+apk
+##88
+##um
+mv
+galaxy
+wiki
+or
+brake
+##ス
+1200
+する
+this
+1991
+mon
+##こ
+❤2017
+po
+##ない
+javascript
+life
+home
+june
+##ss
+system
+900
+##ー
+##0
+pp
+1988
+world
+fb
+4k
+br
+##as
+ic
+ai
+leonardo
+safari
+##60
+live
+free
+xx
+wed
+win7
+kiehl
+##co
+lg
+o2o
+##go
+us
+235
+1949
+mm
+しい
+vfm
+kanye
+##90
+##2015
+##id
+jr
+##ey
+123
+rss
+##sa
+##ro
+##am
+##no
+thu
+fri
+350
+##sh
+##ki
+103
+comments
+name
+##のて
+##pe
+##ine
+max
+1987
+8000
+uber
+##mi
+##ton
+wordpress
+office
+1986
+1985
+##ment
+107
+bd
+win10
+##ld
+##li
+gmail
+bb
+dior
+##rs
+##ri
+##rd
+##ます
+up
+cad
+##®
+dr
+して
+read
+##21
+をお
+##io
+##99
+url
+1984
+pvc
+paypal
+show
+policy
+##40
+##ty
+##18
+with
+##★
+##01
+txt
+102
+##ba
+dna
+from
+post
+mini
+ar
+taiwan
+john
+##ga
+privacy
+agoda
+##13
+##ny
+word
+##24
+##22
+##by
+##ur
+##hz
+1982
+##ang
+265
+cookie
+netscape
+108
+##ka
+##~
+##ad
+house
+share
+note
+ibm
+code
+hello
+nike
+sim
+survey
+##016
+1979
+1950
+wikia
+##32
+##017
+5g
+cbc
+##tor
+##kg
+1983
+##rt
+##14
+campaign
+store
+2500
+os
+##ct
+##ts
+##°
+170
+api
+##ns
+365
+excel
+##な
+##ao
+##ら
+##し
+~~
+##nd
+university
+163
+には
+518
+##70
+##ya
+##il
+##25
+pierre
+ipo
+0020
+897
+##23
+hotels
+##ian
+のお
+125
+years
+6606
+##ers
+##26
+high
+##day
+time
+##ay
+bug
+##line
+##く
+##す
+##be
+xp
+talk2yam
+yamservice
+10000
+coco
+##dy
+sony
+##ies
+1978
+microsoft
+david
+people
+##ha
+1960
+instagram
+intel
+その
+##ot
+iso
+1981
+##va
+115
+##mo
+##land
+xxx
+man
+co
+ltxsw
+##ation
+baby
+220
+##pa
+##ol
+1945
+7000
+tag
+450
+##ue
+msn
+##31
+oppo
+##ト
+##ca
+control
+##om
+st
+chrome
+##ure
+##ん
+be
+##き
+lol
+##19
+した
+##bo
+240
+lady
+##100
+##way
+##から
+4600
+##ko
+##do
+##un
+4s
+corporation
+168
+##ni
+herme
+##28
+cp
+978
+##up
+##06
+ui
+##ds
+ppt
+admin
+three
+します
+bbc
+re
+128
+##48
+ca
+##015
+##35
+hp
+##ee
+tpp
+##た
+##ive
+××
+root
+##cc
+##ました
+##ble
+##ity
+adobe
+park
+114
+et
+oled
+city
+##ex
+##ler
+##ap
+china
+##book
+20000
+view
+##ice
+global
+##km
+your
+hong
+##mg
+out
+##ms
+ng
+ebay
+##29
+menu
+ubuntu
+##cy
+rom
+##view
+open
+ktv
+do
+server
+##lo
+if
+english
+##ね
+##5
+##oo
+1600
+##02
+step1
+kong
+club
+135
+july
+inc
+1976
+mr
+hi
+##net
+touch
+##ls
+##ii
+michael
+lcd
+##05
+##33
+phone
+james
+step2
+1300
+ios9
+##box
+dc
+##2
+##ley
+samsung
+111
+280
+pokemon
+css
+##ent
+##les
+いいえ
+##1
+s8
+atom
+play
+bmw
+##said
+sa
+etf
+ctrl
+♥yoyo♥
+##55
+2025
+##2014
+##66
+adidas
+amazon
+1958
+##ber
+##ner
+visa
+##77
+##der
+1800
+connectivity
+##hi
+firefox
+109
+118
+hr
+so
+style
+mark
+pop
+ol
+skip
+1975
+as
+##27
+##ir
+##61
+190
+mba
+##う
+##ai
+le
+##ver
+1900
+cafe2017
+lte
+super
+113
+129
+##ron
+amd
+like
+##☆
+are
+##ster
+we
+##sk
+paul
+data
+international
+##ft
+longchamp
+ssd
+good
+##ート
+##ti
+reply
+##my
+↓↓↓
+apr
+star
+##ker
+source
+136
+js
+112
+get
+force
+photo
+##one
+126
+##2013
+##ow
+link
+bbs
+1972
+goods
+##lin
+python
+119
+##ip
+game
+##ics
+##ません
+blue
+##●
+520
+##45
+page
+itunes
+##03
+1955
+260
+1968
+gt
+gif
+618
+##ff
+##47
+group
+くたさい
+about
+bar
+ganji
+##nce
+music
+lee
+not
+1977
+1971
+1973
+##per
+an
+faq
+comment
+##って
+days
+##ock
+116
+##bs
+1974
+1969
+v1
+player
+1956
+xbox
+sql
+fm
+f1
+139
+##ah
+210
+##lv
+##mp
+##000
+melody
+1957
+##3
+550
+17life
+199
+1966
+xml
+market
+##au
+##71
+999
+##04
+what
+gl
+##95
+##age
+tips
+##68
+book
+##ting
+mysql
+can
+1959
+230
+##ung
+wonderland
+watch
+10℃
+##ction
+9000
+mar
+mobile
+1946
+1962
+article
+##db
+part
+▲top
+party
+って
+1967
+1964
+1948
+##07
+##ore
+##op
+この
+dj
+##78
+##38
+010
+main
+225
+1965
+##ong
+art
+320
+ad
+134
+020
+##73
+117
+pm2
+japan
+228
+##08
+ts
+1963
+##ica
+der
+sm
+##36
+2019
+##wa
+ct
+##7
+##や
+##64
+1937
+homemesh
+search
+##85
+##れは
+##tv
+##di
+macbook
+##9
+##くたさい
+service
+##♥
+type
+った
+750
+##ier
+##si
+##75
+##います
+##ok
+best
+##ット
+goris
+lock
+##った
+cf
+3m
+big
+##ut
+ftp
+carol
+##vi
+10
+1961
+happy
+sd
+##ac
+122
+anti
+pe
+cnn
+iii
+1920
+138
+##ラ
+1940
+esp
+jan
+tags
+##98
+##51
+august
+vol
+##86
+154
+##™
+##fs
+##れ
+##sion
+design
+ac
+##ム
+press
+jordan
+ppp
+that
+key
+check
+##6
+##tt
+##㎡
+1080p
+##lt
+power
+##42
+1952
+##bc
+vivi
+##ック
+he
+133
+121
+jpg
+##rry
+201
+175
+3500
+1947
+nb
+##ted
+##rn
+しています
+1954
+usd
+##t00
+master
+##ンク
+001
+model
+##58
+al
+##09
+1953
+##34
+ram
+goo
+ても
+##ui
+127
+1930
+red
+##ary
+rpg
+item
+##pm
+##41
+270
+##za
+project
+##2012
+hot
+td
+blogabstract
+##ger
+##62
+650
+##44
+gr2
+##します
+##m
+black
+electronic
+nfc
+year
+asus
+また
+html5
+cindy
+##hd
+m3
+132
+esc
+##od
+booking
+##53
+fed
+tvb
+##81
+##ina
+mit
+165
+##いる
+chan
+192
+distribution
+next
+になる
+peter
+bios
+steam
+cm
+1941
+にも
+pk10
+##ix
+##65
+##91
+dec
+nasa
+##ana
+icecat
+00z
+b1
+will
+##46
+li
+se
+##ji
+##み
+##ard
+oct
+##ain
+jp
+##ze
+##bi
+cio
+##56
+smart
+h5
+##39
+##port
+curve
+vpn
+##nm
+##dia
+utc
+##あり
+12345678910
+##52
+rmvb
+chanel
+a4
+miss
+##and
+##im
+media
+who
+##63
+she
+girl
+5s
+124
+vera
+##して
+class
+vivo
+king
+##フ
+##ei
+national
+ab
+1951
+5cm
+888
+145
+ipod
+ap
+1100
+5mm
+211
+ms
+2756
+##69
+mp4
+msci
+##po
+##89
+131
+mg
+index
+380
+##bit
+##out
+##zz
+##97
+##67
+158
+apec
+##8
+photoshop
+opec
+¥799
+ては
+##96
+##tes
+##ast
+2g
+○○
+##ール
+¥2899
+##ling
+##よ
+##ory
+1938
+##ical
+kitty
+content
+##43
+step3
+##cn
+win8
+155
+vc
+1400
+iphone7
+robert
+##した
+tcl
+137
+beauty
+##87
+en
+dollars
+##ys
+##oc
+step
+pay
+yy
+a1
+##2011
+##lly
+##ks
+##♪
+1939
+188
+download
+1944
+sep
+exe
+ph
+います
+school
+gb
+center
+pr
+street
+##board
+uv
+##37
+##lan
+winrar
+##que
+##ua
+##com
+1942
+1936
+480
+gpu
+##4
+ettoday
+fu
+tom
+##54
+##ren
+##via
+149
+##72
+b2b
+144
+##79
+##tch
+rose
+arm
+mb
+##49
+##ial
+##nn
+nvidia
+step4
+mvp
+00㎡
+york
+156
+##イ
+how
+cpi
+591
+2765
+gov
+kg
+joe
+##xx
+mandy
+pa
+##ser
+copyright
+fashion
+1935
+don
+##け
+ecu
+##ist
+##art
+erp
+wap
+have
+##lm
+talk
+##ek
+##ning
+##if
+ch
+##ite
+video
+1943
+cs
+san
+iot
+look
+##84
+##2010
+##ku
+october
+##ux
+trump
+##hs
+##ide
+box
+141
+first
+##ins
+april
+##ight
+##83
+185
+angel
+protected
+aa
+151
+162
+x1
+m2
+##fe
+##×
+##ho
+size
+143
+min
+ofo
+fun
+gomaji
+ex
+hdmi
+food
+dns
+march
+chris
+kevin
+##のか
+##lla
+##pp
+##ec
+ag
+ems
+6s
+720p
+##rm
+##ham
+off
+##92
+asp
+team
+fandom
+ed
+299
+▌♥
+##ell
+info
+されています
+##82
+sina
+4066
+161
+##able
+##ctor
+330
+399
+315
+dll
+rights
+ltd
+idc
+jul
+3kg
+1927
+142
+ma
+surface
+##76
+##ク
+~~~
+304
+mall
+eps
+146
+green
+##59
+map
+space
+donald
+v2
+sodu
+##light
+1931
+148
+1700
+まて
+310
+reserved
+htm
+##han
+##57
+2d
+178
+mod
+##ise
+##tions
+152
+ti
+##shi
+doc
+1933
+icp
+055
+wang
+##ram
+shopping
+aug
+##pi
+##well
+now
+wam
+b2
+からお
+##hu
+236
+1928
+##gb
+266
+f2
+##93
+153
+mix
+##ef
+##uan
+bwl
+##plus
+##res
+core
+##ess
+tea
+5℃
+hktvmall
+nhk
+##ate
+list
+##ese
+301
+feb
+4m
+inn
+ての
+nov
+159
+12345
+daniel
+##ci
+pass
+##bet
+##nk
+coffee
+202
+ssl
+airbnb
+##ute
+fbi
+woshipm
+skype
+ea
+cg
+sp
+##fc
+##www
+yes
+edge
+alt
+007
+##94
+fpga
+##ght
+##gs
+iso9001
+さい
+##ile
+##wood
+##uo
+image
+lin
+icon
+american
+##em
+1932
+set
+says
+##king
+##tive
+blogger
+##74
+なと
+256
+147
+##ox
+##zy
+##red
+##ium
+##lf
+nokia
+claire
+##リ
+##ding
+november
+lohas
+##500
+##tic
+##マ
+##cs
+##ある
+##che
+##ire
+##gy
+##ult
+db
+january
+win
+##カ
+166
+road
+ptt
+##ま
+##つ
+198
+##fa
+##mer
+anna
+pchome
+はい
+udn
+ef
+420
+##time
+##tte
+2030
+##ア
+g20
+white
+かかります
+1929
+308
+garden
+eleven
+di
+##おります
+chen
+309b
+777
+172
+young
+cosplay
+ちてない
+4500
+bat
+##123
+##tra
+##ては
+kindle
+npc
+steve
+etc
+##ern
+##|
+call
+xperia
+ces
+travel
+sk
+s7
+##ous
+1934
+##int
+みいたたけます
+183
+edu
+file
+cho
+qr
+##car
+##our
+186
+##ant
+##d
+eric
+1914
+rends
+##jo
+##する
+mastercard
+##2000
+kb
+##min
+290
+##ino
+vista
+##ris
+##ud
+jack
+2400
+##set
+169
+pos
+1912
+##her
+##ou
+taipei
+しく
+205
+beta
+##ませんか
+232
+##fi
+express
+255
+body
+##ill
+aphojoy
+user
+december
+meiki
+##ick
+tweet
+richard
+##av
+##ᆫ
+iphone6
+##dd
+ちてすか
+views
+##mark
+321
+pd
+##00
+times
+##▲
+level
+##ash
+10g
+point
+5l
+##ome
+208
+koreanmall
+##ak
+george
+q2
+206
+wma
+tcp
+##200
+スタッフ
+full
+mlb
+##lle
+##watch
+tm
+run
+179
+911
+smith
+business
+##und
+1919
+color
+##tal
+222
+171
+##less
+moon
+4399
+##rl
+update
+pcb
+shop
+499
+157
+little
+なし
+end
+##mhz
+van
+dsp
+easy
+660
+##house
+##key
+history
+##o
+oh
+##001
+##hy
+##web
+oem
+let
+was
+##2009
+##gg
+review
+##wan
+182
+##°c
+203
+uc
+title
+##val
+united
+233
+2021
+##ons
+doi
+trivago
+overdope
+sbs
+##ance
+##ち
+grand
+special
+573032185
+imf
+216
+wx17house
+##so
+##ーム
+audi
+##he
+london
+william
+##rp
+##ake
+science
+beach
+cfa
+amp
+ps4
+880
+##800
+##link
+##hp
+crm
+ferragamo
+bell
+make
+##eng
+195
+under
+zh
+photos
+2300
+##style
+##ント
+via
+176
+da
+##gi
+company
+i7
+##ray
+thomas
+370
+ufo
+i5
+##max
+plc
+ben
+back
+research
+8g
+173
+mike
+##pc
+##ッフ
+september
+189
+##ace
+vps
+february
+167
+pantos
+wp
+lisa
+1921
+★★
+jquery
+night
+long
+offer
+##berg
+##news
+1911
+##いて
+ray
+fks
+wto
+せます
+over
+164
+340
+##all
+##rus
+1924
+##888
+##works
+blogtitle
+loftpermalink
+##→
+187
+martin
+test
+ling
+km
+##め
+15000
+fda
+v3
+##ja
+##ロ
+wedding
+かある
+outlet
+family
+##ea
+をこ
+##top
+story
+##ness
+salvatore
+##lu
+204
+swift
+215
+room
+している
+oracle
+##ul
+1925
+sam
+b2c
+week
+pi
+rock
+##のは
+##a
+##けと
+##ean
+##300
+##gle
+cctv
+after
+chinese
+##back
+powered
+x2
+##tan
+1918
+##nes
+##イン
+canon
+only
+181
+##zi
+##las
+say
+##oe
+184
+##sd
+221
+##bot
+##world
+##zo
+sky
+made
+top100
+just
+1926
+pmi
+802
+234
+gap
+##vr
+177
+les
+174
+▲topoct
+ball
+vogue
+vi
+ing
+ofweek
+cos
+##list
+##ort
+▲topmay
+##なら
+##lon
+として
+last
+##tc
+##of
+##bus
+##gen
+real
+eva
+##コ
+a3
+nas
+##lie
+##ria
+##coin
+##bt
+▲topapr
+his
+212
+cat
+nata
+vive
+health
+⋯⋯
+drive
+sir
+▲topmar
+du
+cup
+##カー
+##ook
+##よう
+##sy
+alex
+msg
+tour
+しました
+3ce
+##word
+193
+ebooks
+r8
+block
+318
+##より
+2200
+nice
+pvp
+207
+months
+1905
+rewards
+##ther
+1917
+0800
+##xi
+##チ
+##sc
+micro
+850
+gg
+blogfp
+op
+1922
+daily
+m1
+264
+true
+##bb
+ml
+##tar
+##のお
+##ky
+anthony
+196
+253
+##yo
+state
+218
+##ara
+##aa
+##rc
+##tz
+##ston
+より
+gear
+##eo
+##ade
+ge
+see
+1923
+##win
+##ura
+ss
+heart
+##den
+##ita
+down
+##sm
+el
+png
+2100
+610
+rakuten
+whatsapp
+bay
+dream
+add
+##use
+680
+311
+pad
+gucci
+mpv
+##ode
+##fo
+island
+▲topjun
+##▼
+223
+jason
+214
+chicago
+##❤
+しの
+##hone
+io
+##れる
+##ことか
+sogo
+be2
+##ology
+990
+cloud
+vcd
+##con
+2~3
+##ford
+##joy
+##kb
+##こさいます
+##rade
+but
+##ach
+docker
+##ful
+rfid
+ul
+##ase
+hit
+ford
+##star
+580
+##○
+11
+a2
+sdk
+reading
+edited
+##are
+cmos
+##mc
+238
+siri
+light
+##ella
+##ため
+bloomberg
+##read
+pizza
+##ison
+jimmy
+##vm
+college
+node
+journal
+ba
+18k
+##play
+245
+##cer
+20
+magic
+##yu
+191
+jump
+288
+tt
+##ings
+asr
+##lia
+3200
+step5
+network
+##cd
+mc
+いします
+1234
+pixstyleme
+273
+##600
+2800
+money
+★★★★★
+1280
+12
+430
+bl
+みの
+act
+##tus
+tokyo
+##rial
+##life
+emba
+##ae
+saas
+tcs
+##rk
+##wang
+summer
+##sp
+ko
+##ving
+390
+premium
+##その
+netflix
+##ヒ
+uk
+mt
+##lton
+right
+frank
+two
+209
+える
+##ple
+##cal
+021
+##んな
+##sen
+##ville
+hold
+nexus
+dd
+##ius
+てお
+##mah
+##なく
+tila
+zero
+820
+ce
+##tin
+resort
+##ws
+charles
+old
+p10
+5d
+report
+##360
+##ru
+##には
+bus
+vans
+lt
+##est
+pv
+##レ
+links
+rebecca
+##ツ
+##dm
+azure
+##365
+きな
+limited
+bit
+4gb
+##mon
+1910
+moto
+##eam
+213
+1913
+var
+eos
+なとの
+226
+blogspot
+された
+699
+e3
+dos
+dm
+fc
+##ments
+##ik
+##kw
+boy
+##bin
+##ata
+960
+er
+##せ
+219
+##vin
+##tu
+##ula
+194
+##∥
+station
+##ろ
+##ature
+835
+files
+zara
+hdr
+top10
+nature
+950
+magazine
+s6
+marriott
+##シ
+avira
+case
+##っと
+tab
+##ran
+tony
+##home
+oculus
+im
+##ral
+jean
+saint
+cry
+307
+rosie
+##force
+##ini
+ice
+##bert
+のある
+##nder
+##mber
+pet
+2600
+##◆
+plurk
+▲topdec
+##sis
+00kg
+▲topnov
+720
+##ence
+tim
+##ω
+##nc
+##ても
+##name
+log
+ips
+great
+ikea
+malaysia
+unix
+##イト
+3600
+##ncy
+##nie
+12000
+akb48
+##ye
+##oid
+404
+##chi
+##いた
+oa
+xuehai
+##1000
+##orm
+##rf
+275
+さん
+##ware
+##リー
+980
+ho
+##pro
+text
+##era
+560
+bob
+227
+##ub
+##2008
+8891
+scp
+avi
+##zen
+2022
+mi
+wu
+museum
+qvod
+apache
+lake
+jcb
+▲topaug
+★★★
+ni
+##hr
+hill
+302
+ne
+weibo
+490
+ruby
+##ーシ
+##ヶ
+##row
+4d
+▲topjul
+iv
+##ish
+github
+306
+mate
+312
+##スト
+##lot
+##ane
+andrew
+のハイト
+##tina
+t1
+rf
+ed2k
+##vel
+##900
+way
+final
+りの
+ns
+5a
+705
+197
+##メ
+sweet
+bytes
+##ene
+▲topjan
+231
+##cker
+##2007
+##px
+100g
+topapp
+229
+helpapp
+rs
+low
+14k
+g4g
+care
+630
+ldquo
+あり
+##fork
+leave
+rm
+edition
+##gan
+##zon
+##qq
+▲topsep
+##google
+##ism
+gold
+224
+explorer
+##zer
+toyota
+category
+select
+visual
+##labels
+restaurant
+##md
+posts
+s1
+##ico
+もっと
+angelababy
+123456
+217
+sports
+s3
+mbc
+1915
+してくたさい
+shell
+x86
+candy
+##new
+kbs
+face
+xl
+470
+##here
+4a
+swissinfo
+v8
+▲topfeb
+dram
+##ual
+##vice
+3a
+##wer
+sport
+q1
+ios10
+public
+int
+card
+##c
+ep
+au
+rt
+##れた
+1080
+bill
+##mll
+kim
+30
+460
+wan
+##uk
+##ミ
+x3
+298
+0t
+scott
+##ming
+239
+e5
+##3d
+h7n9
+worldcat
+brown
+##あります
+##vo
+##led
+##580
+##ax
+249
+410
+##ert
+paris
+##~6
+polo
+925
+##lr
+599
+##ナ
+capital
+##hing
+bank
+cv
+1g
+##chat
+##s
+##たい
+adc
+##ule
+2m
+##e
+digital
+hotmail
+268
+##pad
+870
+bbq
+quot
+##ring
+before
+wali
+##まて
+mcu
+2k
+2b
+という
+costco
+316
+north
+333
+switch
+##city
+##p
+philips
+##mann
+management
+panasonic
+##cl
+##vd
+##ping
+##rge
+alice
+##lk
+##ましょう
+css3
+##ney
+vision
+alpha
+##ular
+##400
+##tter
+lz
+にお
+##ありません
+mode
+gre
+1916
+pci
+##tm
+237
+1~2
+##yan
+##そ
+について
+##let
+##キ
+work
+war
+coach
+ah
+mary
+##ᅵ
+huang
+##pt
+a8
+pt
+follow
+##berry
+1895
+##ew
+a5
+ghost
+##ション
+##wn
+##og
+south
+##code
+girls
+##rid
+action
+villa
+git
+r11
+table
+games
+##cket
+error
+##anonymoussaid
+##ag
+here
+##ame
+##gc
+qa
+##■
+##lis
+gmp
+##gin
+vmalife
+##cher
+yu
+wedding
+##tis
+demo
+dragon
+530
+soho
+social
+bye
+##rant
+river
+orz
+acer
+325
+##↑
+##ース
+##ats
+261
+del
+##ven
+440
+ups
+##ように
+##ター
+305
+value
+macd
+yougou
+##dn
+661
+##ano
+ll
+##urt
+##rent
+continue
+script
+##wen
+##ect
+paper
+263
+319
+shift
+##chel
+##フト
+##cat
+258
+x5
+fox
+243
+##さん
+car
+aaa
+##blog
+loading
+##yn
+##tp
+kuso
+799
+si
+sns
+イカせるテンマ
+ヒンクテンマ3
+rmb
+vdc
+forest
+central
+prime
+help
+ultra
+##rmb
+##ような
+241
+square
+688
+##しい
+のないフロクに
+##field
+##reen
+##ors
+##ju
+c1
+start
+510
+##air
+##map
+cdn
+##wo
+cba
+stephen
+m8
+100km
+##get
+opera
+##base
+##ood
+vsa
+com™
+##aw
+##ail
+251
+なのて
+count
+t2
+##ᅡ
+##een
+2700
+hop
+##gp
+vsc
+tree
+##eg
+##ose
+816
+285
+##ories
+##shop
+alphago
+v4
+1909
+simon
+##ᆼ
+fluke62max
+zip
+スホンサー
+##sta
+louis
+cr
+bas
+##~10
+bc
+##yer
+hadoop
+##ube
+##wi
+1906
+0755
+hola
+##low
+place
+centre
+5v
+d3
+##fer
+252
+##750
+##media
+281
+540
+0l
+exchange
+262
+series
+##ハー
+##san
+eb
+##bank
+##k
+q3
+##nge
+##mail
+take
+##lp
+259
+1888
+client
+east
+cache
+event
+vincent
+##ールを
+きを
+##nse
+sui
+855
+adchoice
+##и
+##stry
+##なたの
+246
+##zone
+ga
+apps
+sea
+##ab
+248
+cisco
+##タ
+##rner
+kymco
+##care
+dha
+##pu
+##yi
+minkoff
+royal
+p1
+への
+annie
+269
+collection
+kpi
+playstation
+257
+になります
+866
+bh
+##bar
+queen
+505
+radio
+1904
+andy
+armani
+##xy
+manager
+iherb
+##ery
+##share
+spring
+raid
+johnson
+1908
+##ob
+volvo
+hall
+##ball
+v6
+our
+taylor
+##hk
+bi
+242
+##cp
+kate
+bo
+water
+technology
+##rie
+サイトは
+277
+##ona
+##sl
+hpv
+303
+gtx
+hip
+rdquo
+jayz
+stone
+##lex
+##rum
+namespace
+##やり
+620
+##ale
+##atic
+des
+##erson
+##ql
+##ves
+##type
+enter
+##この
+##てきます
+d2
+##168
+##mix
+##bian
+との
+a9
+jj
+ky
+##lc
+access
+movie
+##hc
+リストに
+tower
+##ration
+##mit
+ます
+##nch
+ua
+tel
+prefix
+##o2
+1907
+##point
+1901
+ott
+~10
+##http
+##ury
+baidu
+##ink
+member
+##logy
+bigbang
+nownews
+##js
+##shot
+##tb
+##こと
+247
+eba
+##tics
+##lus
+ける
+v5
+spark
+##ama
+there
+##ions
+god
+##lls
+##down
+hiv
+##ress
+burberry
+day2
+##kv
+◆◆
+jeff
+related
+film
+edit
+joseph
+283
+##ark
+cx
+32gb
+order
+g9
+30000
+##ans
+##tty
+s5
+##bee
+かあります
+thread
+xr
+buy
+sh
+005
+land
+spotify
+mx
+##ari
+276
+##verse
+×email
+sf
+why
+##ことて
+244
+7headlines
+nego
+sunny
+dom
+exo
+401
+666
+positioning
+fit
+rgb
+##tton
+278
+kiss
+alexa
+adam
+lp
+みリストを
+##g
+mp
+##ties
+##llow
+amy
+##du
+np
+002
+institute
+271
+##rth
+##lar
+2345
+590
+##des
+sidebar
+15
+imax
+site
+##cky
+##kit
+##ime
+##009
+season
+323
+##fun
+##ンター
+##ひ
+gogoro
+a7
+pu
+lily
+fire
+twd600
+##ッセーシを
+いて
+##vis
+30ml
+##cture
+##をお
+information
+##オ
+close
+friday
+##くれる
+yi
+nick
+てすか
+##tta
+##tel
+6500
+##lock
+cbd
+economy
+254
+かお
+267
+tinker
+double
+375
+8gb
+voice
+##app
+oops
+channel
+today
+985
+##right
+raw
+xyz
+##+
+jim
+edm
+##cent
+7500
+supreme
+814
+ds
+##its
+##asia
+dropbox
+##てすか
+##tti
+books
+272
+100ml
+##tle
+##ller
+##ken
+##more
+##boy
+sex
+309
+##dom
+t3
+##ider
+##なります
+##unch
+1903
+810
+feel
+5500
+##かった
+##put
+により
+s2
+mo
+##gh
+men
+ka
+amoled
+div
+##tr
+##n1
+port
+howard
+##tags
+ken
+dnf
+##nus
+adsense
+##а
+ide
+##へ
+buff
+thunder
+##town
+##ique
+has
+##body
+auto
+pin
+##erry
+tee
+てした
+295
+number
+##the
+##013
+object
+psp
+cool
+udnbkk
+16gb
+##mic
+miui
+##tro
+most
+r2
+##alk
+##nity
+1880
+±0
+##いました
+428
+s4
+law
+version
+##oa
+n1
+sgs
+docomo
+##tf
+##ack
+henry
+fc2
+##ded
+##sco
+##014
+##rite
+286
+0mm
+linkedin
+##ada
+##now
+wii
+##ndy
+ucbug
+##◎
+sputniknews
+legalminer
+##ika
+##xp
+2gb
+##bu
+q10
+oo
+b6
+come
+##rman
+cheese
+ming
+maker
+##gm
+nikon
+##fig
+ppi
+kelly
+##ります
+jchere
+てきます
+ted
+md
+003
+fgo
+tech
+##tto
+dan
+soc
+##gl
+##len
+hair
+earth
+640
+521
+img
+##pper
+##a1
+##てきる
+##ロク
+acca
+##ition
+##ference
+suite
+##ig
+outlook
+##mond
+##cation
+398
+##pr
+279
+101vip
+358
+##999
+282
+64gb
+3800
+345
+airport
+##over
+284
+##おり
+jones
+##ith
+lab
+##su
+##いるのて
+co2
+town
+piece
+##llo
+no1
+vmware
+24h
+##qi
+focus
+reader
+##admin
+##ora
+tb
+false
+##log
+1898
+know
+lan
+838
+##ces
+f4
+##ume
+motel
+stop
+##oper
+na
+flickr
+netcomponents
+##af
+##─
+pose
+williams
+local
+##ound
+##cg
+##site
+##iko
+いお
+274
+5m
+gsm
+con
+##ath
+1902
+friends
+##hip
+cell
+317
+##rey
+780
+cream
+##cks
+012
+##dp
+facebooktwitterpinterestgoogle
+sso
+324
+shtml
+song
+swiss
+##mw
+##キンク
+lumia
+xdd
+string
+tiffany
+522
+marc
+られた
+insee
+russell
+sc
+dell
+##ations
+ok
+camera
+289
+##vs
+##flow
+##late
+classic
+287
+##nter
+stay
+g1
+mtv
+512
+##ever
+##lab
+##nger
+qe
+sata
+ryan
+d1
+50ml
+cms
+##cing
+su
+292
+3300
+editor
+296
+##nap
+security
+sunday
+association
+##ens
+##700
+##bra
+acg
+##かり
+sofascore
+とは
+mkv
+##ign
+jonathan
+gary
+build
+labels
+##oto
+tesla
+moba
+qi
+gohappy
+general
+ajax
+1024
+##かる
+サイト
+society
+##test
+##urs
+wps
+fedora
+##ich
+mozilla
+328
+##480
+##dr
+usa
+urn
+##lina
+##r
+grace
+##die
+##try
+##ader
+1250
+##なり
+elle
+570
+##chen
+##ᆯ
+price
+##ten
+uhz
+##ough
+eq
+##hen
+states
+push
+session
+balance
+wow
+506
+##cus
+##py
+when
+##ward
+##ep
+34e
+wong
+library
+prada
+##サイト
+##cle
+running
+##ree
+313
+ck
+date
+q4
+##ctive
+##ool
+##>
+mk
+##ira
+##163
+388
+die
+secret
+rq
+dota
+buffet
+は1ヶ
+e6
+##ez
+pan
+368
+ha
+##card
+##cha
+2a
+##さ
+alan
+day3
+eye
+f3
+##end
+france
+keep
+adi
+rna
+tvbs
+##ala
+solo
+nova
+##え
+##tail
+##ょう
+support
+##ries
+##なる
+##ved
+base
+copy
+iis
+fps
+##ways
+hero
+hgih
+profile
+fish
+mu
+ssh
+entertainment
+chang
+##wd
+click
+cake
+##ond
+pre
+##tom
+kic
+pixel
+##ov
+##fl
+product
+6a
+##pd
+dear
+##gate
+es
+yumi
+audio
+##²
+##sky
+echo
+bin
+where
+##ture
+329
+##ape
+find
+sap
+isis
+##なと
+nand
+##101
+##load
+##ream
+band
+a6
+525
+never
+##post
+festival
+50cm
+##we
+555
+guide
+314
+zenfone
+##ike
+335
+gd
+forum
+jessica
+strong
+alexander
+##ould
+software
+allen
+##ious
+program
+360°
+else
+lohasthree
+##gar
+することかてきます
+please
+##れます
+rc
+##ggle
+##ric
+bim
+50000
+##own
+eclipse
+355
+brian
+3ds
+##side
+061
+361
+##other
+##ける
+##tech
+##ator
+485
+engine
+##ged
+##t
+plaza
+##fit
+cia
+ngo
+westbrook
+shi
+tbs
+50mm
+##みませんか
+sci
+291
+reuters
+##ily
+contextlink
+##hn
+af
+##cil
+bridge
+very
+##cel
+1890
+cambridge
+##ize
+15g
+##aid
+##data
+790
+frm
+##head
+award
+butler
+##sun
+meta
+##mar
+america
+ps3
+puma
+pmid
+##すか
+lc
+670
+kitchen
+##lic
+オーフン5
+きなしソフトサーヒス
+そして
+day1
+future
+★★★★
+##text
+##page
+##rris
+pm1
+##ket
+fans
+##っています
+1001
+christian
+bot
+kids
+trackback
+##hai
+c3
+display
+##hl
+n2
+1896
+idea
+さんも
+##sent
+airmail
+##ug
+##men
+pwm
+けます
+028
+##lution
+369
+852
+awards
+schemas
+354
+asics
+wikipedia
+font
+##tional
+##vy
+c2
+293
+##れている
+##dget
+##ein
+っている
+contact
+pepper
+スキル
+339
+##~5
+294
+##uel
+##ument
+730
+##hang
+みてす
+q5
+##sue
+rain
+##ndi
+wei
+swatch
+##cept
+わせ
+331
+popular
+##ste
+##tag
+p2
+501
+trc
+1899
+##west
+##live
+justin
+honda
+ping
+messenger
+##rap
+v9
+543
+##とは
+unity
+appqq
+はすへて
+025
+leo
+##tone
+##テ
+##ass
+uniqlo
+##010
+502
+her
+jane
+memory
+moneydj
+##tical
+human
+12306
+していると
+##m2
+coc
+miacare
+##mn
+tmt
+##core
+vim
+kk
+##may
+fan
+target
+use
+too
+338
+435
+2050
+867
+737
+fast
+##2c
+services
+##ope
+omega
+energy
+##わ
+pinkoi
+1a
+##なから
+##rain
+jackson
+##ement
+##シャンルの
+374
+366
+そんな
+p9
+rd
+##ᆨ
+1111
+##tier
+##vic
+zone
+##│
+385
+690
+dl
+isofix
+cpa
+m4
+322
+kimi
+めて
+davis
+##lay
+lulu
+##uck
+050
+weeks
+qs
+##hop
+920
+##n
+ae
+##ear
+~5
+eia
+405
+##fly
+korea
+jpeg
+boost
+##ship
+small
+##リア
+1860
+eur
+297
+425
+valley
+##iel
+simple
+##ude
+rn
+k2
+##ena
+されます
+non
+patrick
+しているから
+##ナー
+feed
+5757
+30g
+process
+well
+qqmei
+##thing
+they
+aws
+lu
+pink
+##ters
+##kin
+または
+board
+##vertisement
+wine
+##ien
+unicode
+##dge
+r1
+359
+##tant
+いを
+##twitter
+##3c
+cool1
+される
+##れて
+##l
+isp
+##012
+standard
+45㎡2
+402
+##150
+matt
+##fu
+326
+##iner
+googlemsn
+pixnetfacebookyahoo
+##ラン
+x7
+886
+##uce
+メーカー
+sao
+##ev
+##きました
+##file
+9678
+403
+xddd
+shirt
+6l
+##rio
+##hat
+3mm
+givenchy
+ya
+bang
+##lio
+monday
+crystal
+ロクイン
+##abc
+336
+head
+890
+ubuntuforumwikilinuxpastechat
+##vc
+##~20
+##rity
+cnc
+7866
+ipv6
+null
+1897
+##ost
+yang
+imsean
+tiger
+##fet
+##ンス
+352
+##=
+dji
+327
+ji
+maria
+##come
+##んて
+foundation
+3100
+##beth
+##なった
+1m
+601
+active
+##aft
+##don
+3p
+sr
+349
+emma
+##khz
+living
+415
+353
+1889
+341
+709
+457
+sas
+x6
+##face
+pptv
+x4
+##mate
+han
+sophie
+##jing
+337
+fifa
+##mand
+other
+sale
+inwedding
+##gn
+てきちゃいます
+##mmy
+##pmlast
+bad
+nana
+nbc
+してみてくたさいね
+なとはお
+##wu
+##かあります
+##あ
+note7
+single
+##340
+せからこ
+してくたさい♪この
+しにはとんとんワークケートを
+するとあなたにもっとマッチした
+ならワークケートへ
+もみつかっちゃうかも
+ワークケートの
+##bel
+window
+##dio
+##ht
+union
+age
+382
+14
+##ivity
+##y
+コメント
+domain
+neo
+##isa
+##lter
+5k
+f5
+steven
+##cts
+powerpoint
+tft
+self
+g2
+ft
+##テル
+zol
+##act
+mwc
+381
+343
+もう
+nbapop
+408
+てある
+eds
+ace
+##room
+previous
+author
+tomtom
+il
+##ets
+hu
+financial
+☆☆☆
+っています
+bp
+5t
+chi
+1gb
+##hg
+fairmont
+cross
+008
+gay
+h2
+function
+##けて
+356
+also
+1b
+625
+##ータ
+##raph
+1894
+3~5
+##ils
+i3
+334
+avenue
+##host
+による
+##bon
+##tsu
+message
+navigation
+50g
+fintech
+h6
+##ことを
+8cm
+##ject
+##vas
+##firm
+credit
+##wf
+xxxx
+form
+##nor
+##space
+huawei
+plan
+json
+sbl
+##dc
+machine
+921
+392
+wish
+##120
+##sol
+windows7
+edward
+##ために
+development
+washington
+##nsis
+lo
+818
+##sio
+##ym
+##bor
+planet
+##~8
+##wt
+ieee
+gpa
+##めて
+camp
+ann
+gm
+##tw
+##oka
+connect
+##rss
+##work
+##atus
+wall
+chicken
+soul
+2mm
+##times
+fa
+##ather
+##cord
+009
+##eep
+hitachi
+gui
+harry
+##pan
+e1
+disney
+##press
+##ーション
+wind
+386
+frigidaire
+##tl
+liu
+hsu
+332
+basic
+von
+ev
+いた
+てきる
+スホンサーサイト
+learning
+##ull
+expedia
+archives
+change
+##wei
+santa
+cut
+ins
+6gb
+turbo
+brand
+cf1
+508
+004
+return
+747
+##rip
+h1
+##nis
+##をこ
+128gb
+##にお
+3t
+application
+しており
+emc
+rx
+##oon
+384
+quick
+412
+15058
+wilson
+wing
+chapter
+##bug
+beyond
+##cms
+##dar
+##oh
+zoom
+e2
+trip
+sb
+##nba
+rcep
+342
+aspx
+ci
+080
+gc
+gnu
+める
+##count
+advanced
+dance
+dv
+##url
+##ging
+367
+8591
+am09
+shadow
+battle
+346
+##i
+##cia
+##という
+emily
+##のてす
+##tation
+host
+ff
+techorz
+sars
+##mini
+##mporary
+##ering
+nc
+4200
+798
+##next
+cma
+##mbps
+##gas
+##ift
+##dot
+##ィ
+455
+##~17
+amana
+##りの
+426
+##ros
+ir
+00㎡1
+##eet
+##ible
+##↓
+710
+ˋ▽ˊ
+##aka
+dcs
+iq
+##v
+l1
+##lor
+maggie
+##011
+##iu
+588
+##~1
+830
+##gt
+1tb
+articles
+create
+##burg
+##iki
+database
+fantasy
+##rex
+##cam
+dlc
+dean
+##you
+hard
+path
+gaming
+victoria
+maps
+cb
+##lee
+##itor
+overchicstoretvhome
+systems
+##xt
+416
+p3
+sarah
+760
+##nan
+407
+486
+x9
+install
+second
+626
+##ann
+##ph
+##rcle
+##nic
+860
+##nar
+ec
+##とう
+768
+metro
+chocolate
+##rian
+~4
+##table
+##しています
+skin
+##sn
+395
+mountain
+##0mm
+inparadise
+6m
+7x24
+ib
+4800
+##jia
+eeworld
+creative
+g5
+g3
+357
+parker
+ecfa
+village
+からの
+18000
+sylvia
+サーヒス
+hbl
+##ques
+##onsored
+##x2
+##きます
+##v4
+##tein
+ie6
+383
+##stack
+389
+ver
+##ads
+##baby
+sound
+bbe
+##110
+##lone
+##uid
+ads
+022
+gundam
+351
+thinkpad
+006
+scrum
+match
+##ave
+mems
+##470
+##oy
+##なりました
+##talk
+glass
+lamigo
+span
+##eme
+job
+##a5
+jay
+wade
+kde
+498
+##lace
+ocean
+tvg
+##covery
+##r3
+##ners
+##rea
+junior
+think
+##aine
+cover
+##ision
+##sia
+↓↓
+##bow
+msi
+413
+458
+406
+##love
+711
+801
+soft
+z2
+##pl
+456
+1840
+mobil
+mind
+##uy
+427
+nginx
+##oi
+めた
+##rr
+6221
+##mple
+##sson
+##ーシてす
+371
+##nts
+91tv
+comhd
+crv3000
+##uard
+1868
+397
+deep
+lost
+field
+gallery
+##bia
+rate
+spf
+redis
+traction
+930
+icloud
+011
+なら
+fe
+jose
+372
+##tory
+into
+sohu
+fx
+899
+379
+kicstart2
+##hia
+すく
+##~3
+##sit
+ra
+24
+##walk
+##xure
+500g
+##pact
+pacific
+xa
+natural
+carlo
+##250
+##walker
+1850
+##can
+cto
+gigi
+516
+##サー
+pen
+##hoo
+ob
+matlab
+##b
+##yy
+13913459
+##iti
+mango
+##bbs
+sense
+c5
+oxford
+##ニア
+walker
+jennifer
+##ola
+course
+##bre
+701
+##pus
+##rder
+lucky
+075
+##ぁ
+ivy
+なお
+##nia
+sotheby
+side
+##ugh
+joy
+##orage
+##ush
+##bat
+##dt
+364
+r9
+##2d
+##gio
+511
+country
+wear
+##lax
+##~7
+##moon
+393
+seven
+study
+411
+348
+lonzo
+8k
+##ェ
+evolution
+##イフ
+##kk
+gs
+kd
+##レス
+arduino
+344
+b12
+##lux
+arpg
+##rdon
+cook
+##x5
+dark
+five
+##als
+##ida
+とても
+sign
+362
+##ちの
+something
+20mm
+##nda
+387
+##posted
+fresh
+tf
+1870
+422
+cam
+##mine
+##skip
+##form
+##ssion
+education
+394
+##tee
+dyson
+stage
+##jie
+want
+##night
+epson
+pack
+あります
+##ppy
+テリヘル
+##█
+wd
+##eh
+##rence
+left
+##lvin
+golden
+mhz
+discovery
+##trix
+##n2
+loft
+##uch
+##dra
+##sse
+speed
+~1
+1mdb
+sorry
+welcome
+##urn
+wave
+gaga
+##lmer
+teddy
+##160
+トラックハック
+せよ
+611
+##f2016
+378
+rp
+##sha
+rar
+##あなたに
+##きた
+840
+holiday
+##ュー
+373
+074
+##vg
+##nos
+##rail
+gartner
+gi
+6p
+##dium
+kit
+488
+b3
+eco
+##ろう
+20g
+sean
+##stone
+autocad
+nu
+##np
+f16
+write
+029
+m5
+##ias
+images
+atp
+##dk
+fsm
+504
+1350
+ve
+52kb
+##xxx
+##のに
+##cake
+414
+unit
+lim
+ru
+1v
+##ification
+published
+angela
+16g
+analytics
+ak
+##q
+##nel
+gmt
+##icon
+again
+##₂
+##bby
+ios11
+445
+かこさいます
+waze
+いてす
+##ハ
+9985
+##ust
+##ティー
+framework
+##007
+iptv
+delete
+52sykb
+cl
+wwdc
+027
+30cm
+##fw
+##ての
+1389
+##xon
+brandt
+##ses
+##dragon
+tc
+vetements
+anne
+monte
+modern
+official
+##へて
+##ere
+##nne
+##oud
+もちろん
+50
+etnews
+##a2
+##graphy
+421
+863
+##ちゃん
+444
+##rtex
+##てお
+l2
+##gma
+mount
+ccd
+たと
+archive
+morning
+tan
+ddos
+e7
+##ホ
+day4
+##ウ
+gis
+453
+its
+495
+factory
+bruce
+pg
+##ito
+ってくたさい
+guest
+cdma
+##lling
+536
+n3
+しかし
+3~4
+mega
+eyes
+ro
+13
+women
+dac
+church
+##jun
+singapore
+##facebook
+6991
+starbucks
+##tos
+##stin
+##shine
+zen
+##mu
+tina
+20℃
+1893
+##たけて
+503
+465
+request
+##gence
+qt
+##っ
+1886
+347
+363
+q7
+##zzi
+diary
+##tore
+409
+##ead
+468
+cst
+##osa
+canada
+agent
+va
+##jiang
+##ちは
+##ーク
+##lam
+sg
+##nix
+##sday
+##よって
+g6
+##master
+bing
+##zl
+charlie
+16
+8mm
+nb40
+##ーン
+thai
+##ルフ
+ln284ct
+##itz
+##2f
+bonnie
+##food
+##lent
+originals
+##stro
+##lts
+418
+∟∣
+##bscribe
+children
+ntd
+yesstyle
+##かも
+hmv
+##tment
+d5
+2cm
+arts
+sms
+##pn
+##я
+##いい
+topios9
+539
+lifestyle
+virtual
+##ague
+xz
+##deo
+muji
+024
+unt
+##nnis
+##ᅩ
+faq1
+1884
+396
+##ette
+fly
+64㎡
+はしめまして
+441
+curry
+##pop
+のこ
+release
+##←
+##◆◆
+##cast
+073
+ありな
+500ml
+##ews
+5c
+##stle
+ios7
+##ima
+787
+dog
+lenovo
+##r4
+roger
+013
+cbs
+vornado
+100m
+417
+##desk
+##クok
+##ald
+1867
+9595
+2900
+##van
+oil
+##x
+some
+break
+common
+##jy
+##lines
+g7
+twice
+419
+ella
+nano
+belle
+にこ
+##mes
+##self
+##note
+jb
+##ことかてきます
+benz
+##との
+##ova
+451
+save
+##wing
+##ますのて
+kai
+りは
+##hua
+##rect
+rainer
+##unge
+448
+##0m
+adsl
+##かな
+guestname
+##uma
+##kins
+##zu
+tokichoi
+##price
+county
+##med
+##mus
+rmk
+391
+address
+vm
+えて
+openload
+##group
+##hin
+##iginal
+amg
+urban
+##oz
+jobs
+emi
+##public
+beautiful
+##sch
+album
+##dden
+##bell
+jerry
+works
+hostel
+miller
+##drive
+##rmin
+##10
+376
+boot
+828
+##370
+##fx
+##cm~
+1885
+##nome
+##ctionary
+##oman
+##lish
+##cr
+##hm
+433
+##how
+432
+francis
+xi
+c919
+b5
+evernote
+##uc
+vga
+##3000
+coupe
+##urg
+##cca
+##uality
+019
+6g
+れる
+multi
+##また
+##ett
+em
+hey
+##ani
+##tax
+##rma
+inside
+than
+740
+leonnhurt
+##jin
+ict
+れた
+bird
+notes
+200mm
+くの
+##dical
+##lli
+result
+442
+iu
+ee
+438
+smap
+gopro
+##last
+yin
+pure
+998
+32g
+けた
+5kg
+##dan
+##rame
+mama
+##oot
+bean
+marketing
+##hur
+2l
+bella
+sync
+xuite
+##ground
+515
+discuz
+##getrelax
+##ince
+##bay
+##5s
+cj
+##イス
+gmat
+apt
+##pass
+jing
+##rix
+c4
+rich
+##とても
+niusnews
+##ello
+bag
+770
+##eting
+##mobile
+18
+culture
+015
+##のてすか
+377
+1020
+area
+##ience
+616
+details
+gp
+universal
+silver
+dit
+はお
+private
+ddd
+u11
+kanshu
+##ified
+fung
+##nny
+dx
+##520
+tai
+475
+023
+##fr
+##lean
+3s
+##pin
+429
+##rin
+25000
+ly
+rick
+##bility
+usb3
+banner
+##baru
+##gion
+metal
+dt
+vdf
+1871
+karl
+qualcomm
+bear
+1010
+oldid
+ian
+jo
+##tors
+population
+##ernel
+1882
+mmorpg
+##mv
+##bike
+603
+##©
+ww
+friend
+##ager
+exhibition
+##del
+##pods
+fpx
+structure
+##free
+##tings
+kl
+##rley
+##copyright
+##mma
+california
+3400
+orange
+yoga
+4l
+canmake
+honey
+##anda
+##コメント
+595
+nikkie
+##ルハイト
+dhl
+publishing
+##mall
+##gnet
+20cm
+513
+##クセス
+##┅
+e88
+970
+##dog
+fishbase
+##!
+##"
+###
+##$
+##%
+##&
+##'
+##(
+##)
+##*
+##+
+##,
+##-
+##.
+##/
+##:
+##;
+##<
+##=
+##>
+##?
+##@
+##[
+##\
+##]
+##^
+##_
+##{
+##|
+##}
+##~
+##£
+##¤
+##¥
+##§
+##«
+##±
+##³
+##µ
+##·
+##¹
+##º
+##»
+##¼
+##ß
+##æ
+##÷
+##ø
+##đ
+##ŋ
+##ɔ
+##ə
+##ɡ
+##ʰ
+##ˇ
+##ˈ
+##ˊ
+##ˋ
+##ˍ
+##ː
+##˙
+##˚
+##ˢ
+##α
+##β
+##γ
+##δ
+##ε
+##η
+##θ
+##ι
+##κ
+##λ
+##μ
+##ν
+##ο
+##π
+##ρ
+##ς
+##σ
+##τ
+##υ
+##φ
+##χ
+##ψ
+##б
+##в
+##г
+##д
+##е
+##ж
+##з
+##к
+##л
+##м
+##н
+##о
+##п
+##р
+##с
+##т
+##у
+##ф
+##х
+##ц
+##ч
+##ш
+##ы
+##ь
+##і
+##ا
+##ب
+##ة
+##ت
+##د
+##ر
+##س
+##ع
+##ل
+##م
+##ن
+##ه
+##و
+##ي
+##۩
+##ก
+##ง
+##น
+##ม
+##ย
+##ร
+##อ
+##า
+##เ
+##๑
+##་
+##ღ
+##ᄀ
+##ᄁ
+##ᄂ
+##ᄃ
+##ᄅ
+##ᄆ
+##ᄇ
+##ᄈ
+##ᄉ
+##ᄋ
+##ᄌ
+##ᄎ
+##ᄏ
+##ᄐ
+##ᄑ
+##ᄒ
+##ᅢ
+##ᅣ
+##ᅥ
+##ᅦ
+##ᅧ
+##ᅨ
+##ᅪ
+##ᅬ
+##ᅭ
+##ᅮ
+##ᅯ
+##ᅲ
+##ᅳ
+##ᅴ
+##ᆷ
+##ᆸ
+##ᆺ
+##ᆻ
+##ᗜ
+##ᵃ
+##ᵉ
+##ᵍ
+##ᵏ
+##ᵐ
+##ᵒ
+##ᵘ
+##‖
+##„
+##†
+##•
+##‥
+##‧
+##
+##‰
+##′
+##″
+##‹
+##›
+##※
+##‿
+##⁄
+##ⁱ
+##⁺
+##ⁿ
+##₁
+##₃
+##₄
+##€
+##№
+##ⅰ
+##ⅱ
+##ⅲ
+##ⅳ
+##ⅴ
+##↔
+##↗
+##↘
+##⇒
+##∀
+##−
+##∕
+##∙
+##√
+##∞
+##∟
+##∠
+##∣
+##∩
+##∮
+##∶
+##∼
+##∽
+##≈
+##≒
+##≡
+##≤
+##≥
+##≦
+##≧
+##≪
+##≫
+##⊙
+##⋅
+##⋈
+##⋯
+##⌒
+##①
+##②
+##③
+##④
+##⑤
+##⑥
+##⑦
+##⑧
+##⑨
+##⑩
+##⑴
+##⑵
+##⑶
+##⑷
+##⑸
+##⒈
+##⒉
+##⒊
+##⒋
+##ⓒ
+##ⓔ
+##ⓘ
+##━
+##┃
+##┆
+##┊
+##┌
+##└
+##├
+##┣
+##═
+##║
+##╚
+##╞
+##╠
+##╭
+##╮
+##╯
+##╰
+##╱
+##╳
+##▂
+##▃
+##▅
+##▇
+##▉
+##▋
+##▌
+##▍
+##▎
+##□
+##▪
+##▫
+##▬
+##△
+##▶
+##►
+##▽
+##◇
+##◕
+##◠
+##◢
+##◤
+##☀
+##☕
+##☞
+##☺
+##☼
+##♀
+##♂
+##♠
+##♡
+##♣
+##♦
+##♫
+##♬
+##✈
+##✔
+##✕
+##✖
+##✦
+##✨
+##✪
+##✰
+##✿
+##❀
+##➜
+##➤
+##⦿
+##、
+##。
+##〃
+##々
+##〇
+##〈
+##〉
+##《
+##》
+##「
+##」
+##『
+##』
+##【
+##】
+##〓
+##〔
+##〕
+##〖
+##〗
+##〜
+##〝
+##〞
+##ぃ
+##ぇ
+##ぬ
+##ふ
+##ほ
+##む
+##ゃ
+##ゅ
+##ゆ
+##ょ
+##゜
+##ゝ
+##ァ
+##ゥ
+##エ
+##ォ
+##ケ
+##サ
+##セ
+##ソ
+##ッ
+##ニ
+##ヌ
+##ネ
+##ノ
+##ヘ
+##モ
+##ャ
+##ヤ
+##ュ
+##ユ
+##ョ
+##ヨ
+##ワ
+##ヲ
+##・
+##ヽ
+##ㄅ
+##ㄆ
+##ㄇ
+##ㄉ
+##ㄋ
+##ㄌ
+##ㄍ
+##ㄎ
+##ㄏ
+##ㄒ
+##ㄚ
+##ㄛ
+##ㄞ
+##ㄟ
+##ㄢ
+##ㄤ
+##ㄥ
+##ㄧ
+##ㄨ
+##ㆍ
+##㈦
+##㊣
+##㗎
+##一
+##丁
+##七
+##万
+##丈
+##三
+##上
+##下
+##不
+##与
+##丐
+##丑
+##专
+##且
+##丕
+##世
+##丘
+##丙
+##业
+##丛
+##东
+##丝
+##丞
+##丟
+##両
+##丢
+##两
+##严
+##並
+##丧
+##丨
+##个
+##丫
+##中
+##丰
+##串
+##临
+##丶
+##丸
+##丹
+##为
+##主
+##丼
+##丽
+##举
+##丿
+##乂
+##乃
+##久
+##么
+##义
+##之
+##乌
+##乍
+##乎
+##乏
+##乐
+##乒
+##乓
+##乔
+##乖
+##乗
+##乘
+##乙
+##乜
+##九
+##乞
+##也
+##习
+##乡
+##书
+##乩
+##买
+##乱
+##乳
+##乾
+##亀
+##亂
+##了
+##予
+##争
+##事
+##二
+##于
+##亏
+##云
+##互
+##五
+##井
+##亘
+##亙
+##亚
+##些
+##亜
+##亞
+##亟
+##亡
+##亢
+##交
+##亥
+##亦
+##产
+##亨
+##亩
+##享
+##京
+##亭
+##亮
+##亲
+##亳
+##亵
+##人
+##亿
+##什
+##仁
+##仃
+##仄
+##仅
+##仆
+##仇
+##今
+##介
+##仍
+##从
+##仏
+##仑
+##仓
+##仔
+##仕
+##他
+##仗
+##付
+##仙
+##仝
+##仞
+##仟
+##代
+##令
+##以
+##仨
+##仪
+##们
+##仮
+##仰
+##仲
+##件
+##价
+##任
+##份
+##仿
+##企
+##伉
+##伊
+##伍
+##伎
+##伏
+##伐
+##休
+##伕
+##众
+##优
+##伙
+##会
+##伝
+##伞
+##伟
+##传
+##伢
+##伤
+##伦
+##伪
+##伫
+##伯
+##估
+##伴
+##伶
+##伸
+##伺
+##似
+##伽
+##佃
+##但
+##佇
+##佈
+##位
+##低
+##住
+##佐
+##佑
+##体
+##佔
+##何
+##佗
+##佘
+##余
+##佚
+##佛
+##作
+##佝
+##佞
+##佟
+##你
+##佢
+##佣
+##佤
+##佥
+##佩
+##佬
+##佯
+##佰
+##佳
+##併
+##佶
+##佻
+##佼
+##使
+##侃
+##侄
+##來
+##侈
+##例
+##侍
+##侏
+##侑
+##侖
+##侗
+##供
+##依
+##侠
+##価
+##侣
+##侥
+##侦
+##侧
+##侨
+##侬
+##侮
+##侯
+##侵
+##侶
+##侷
+##便
+##係
+##促
+##俄
+##俊
+##俎
+##俏
+##俐
+##俑
+##俗
+##俘
+##俚
+##保
+##俞
+##俟
+##俠
+##信
+##俨
+##俩
+##俪
+##俬
+##俭
+##修
+##俯
+##俱
+##俳
+##俸
+##俺
+##俾
+##倆
+##倉
+##個
+##倌
+##倍
+##倏
+##們
+##倒
+##倔
+##倖
+##倘
+##候
+##倚
+##倜
+##借
+##倡
+##値
+##倦
+##倩
+##倪
+##倫
+##倬
+##倭
+##倶
+##债
+##值
+##倾
+##偃
+##假
+##偈
+##偉
+##偌
+##偎
+##偏
+##偕
+##做
+##停
+##健
+##側
+##偵
+##偶
+##偷
+##偻
+##偽
+##偿
+##傀
+##傅
+##傍
+##傑
+##傘
+##備
+##傚
+##傢
+##傣
+##傥
+##储
+##傩
+##催
+##傭
+##傲
+##傳
+##債
+##傷
+##傻
+##傾
+##僅
+##働
+##像
+##僑
+##僕
+##僖
+##僚
+##僥
+##僧
+##僭
+##僮
+##僱
+##僵
+##價
+##僻
+##儀
+##儂
+##億
+##儆
+##儉
+##儋
+##儒
+##儕
+##儘
+##償
+##儡
+##優
+##儲
+##儷
+##儼
+##儿
+##兀
+##允
+##元
+##兄
+##充
+##兆
+##兇
+##先
+##光
+##克
+##兌
+##免
+##児
+##兑
+##兒
+##兔
+##兖
+##党
+##兜
+##兢
+##入
+##內
+##全
+##兩
+##八
+##公
+##六
+##兮
+##兰
+##共
+##兲
+##关
+##兴
+##兵
+##其
+##具
+##典
+##兹
+##养
+##兼
+##兽
+##冀
+##内
+##円
+##冇
+##冈
+##冉
+##冊
+##册
+##再
+##冏
+##冒
+##冕
+##冗
+##写
+##军
+##农
+##冠
+##冢
+##冤
+##冥
+##冨
+##冪
+##冬
+##冯
+##冰
+##冲
+##决
+##况
+##冶
+##冷
+##冻
+##冼
+##冽
+##冾
+##净
+##凄
+##准
+##凇
+##凈
+##凉
+##凋
+##凌
+##凍
+##减
+##凑
+##凛
+##凜
+##凝
+##几
+##凡
+##凤
+##処
+##凪
+##凭
+##凯
+##凰
+##凱
+##凳
+##凶
+##凸
+##凹
+##出
+##击
+##函
+##凿
+##刀
+##刁
+##刃
+##分
+##切
+##刈
+##刊
+##刍
+##刎
+##刑
+##划
+##列
+##刘
+##则
+##刚
+##创
+##初
+##删
+##判
+##別
+##刨
+##利
+##刪
+##别
+##刮
+##到
+##制
+##刷
+##券
+##刹
+##刺
+##刻
+##刽
+##剁
+##剂
+##剃
+##則
+##剉
+##削
+##剋
+##剌
+##前
+##剎
+##剐
+##剑
+##剔
+##剖
+##剛
+##剜
+##剝
+##剣
+##剤
+##剥
+##剧
+##剩
+##剪
+##副
+##割
+##創
+##剷
+##剽
+##剿
+##劃
+##劇
+##劈
+##劉
+##劊
+##劍
+##劏
+##劑
+##力
+##劝
+##办
+##功
+##加
+##务
+##劣
+##动
+##助
+##努
+##劫
+##劭
+##励
+##劲
+##劳
+##労
+##劵
+##効
+##劾
+##势
+##勁
+##勃
+##勇
+##勉
+##勋
+##勐
+##勒
+##動
+##勖
+##勘
+##務
+##勛
+##勝
+##勞
+##募
+##勢
+##勤
+##勧
+##勳
+##勵
+##勸
+##勺
+##勻
+##勾
+##勿
+##匀
+##包
+##匆
+##匈
+##匍
+##匐
+##匕
+##化
+##北
+##匙
+##匝
+##匠
+##匡
+##匣
+##匪
+##匮
+##匯
+##匱
+##匹
+##区
+##医
+##匾
+##匿
+##區
+##十
+##千
+##卅
+##升
+##午
+##卉
+##半
+##卍
+##华
+##协
+##卑
+##卒
+##卓
+##協
+##单
+##卖
+##南
+##単
+##博
+##卜
+##卞
+##卟
+##占
+##卡
+##卢
+##卤
+##卦
+##卧
+##卫
+##卮
+##卯
+##印
+##危
+##即
+##却
+##卵
+##卷
+##卸
+##卻
+##卿
+##厂
+##厄
+##厅
+##历
+##厉
+##压
+##厌
+##厕
+##厘
+##厚
+##厝
+##原
+##厢
+##厥
+##厦
+##厨
+##厩
+##厭
+##厮
+##厲
+##厳
+##去
+##县
+##叁
+##参
+##參
+##又
+##叉
+##及
+##友
+##双
+##反
+##収
+##发
+##叔
+##取
+##受
+##变
+##叙
+##叛
+##叟
+##叠
+##叡
+##叢
+##口
+##古
+##句
+##另
+##叨
+##叩
+##只
+##叫
+##召
+##叭
+##叮
+##可
+##台
+##叱
+##史
+##右
+##叵
+##叶
+##号
+##司
+##叹
+##叻
+##叼
+##叽
+##吁
+##吃
+##各
+##吆
+##合
+##吉
+##吊
+##吋
+##同
+##名
+##后
+##吏
+##吐
+##向
+##吒
+##吓
+##吕
+##吖
+##吗
+##君
+##吝
+##吞
+##吟
+##吠
+##吡
+##否
+##吧
+##吨
+##吩
+##含
+##听
+##吭
+##吮
+##启
+##吱
+##吳
+##吴
+##吵
+##吶
+##吸
+##吹
+##吻
+##吼
+##吽
+##吾
+##呀
+##呂
+##呃
+##呆
+##呈
+##告
+##呋
+##呎
+##呐
+##呓
+##呕
+##呗
+##员
+##呛
+##呜
+##呢
+##呤
+##呦
+##周
+##呱
+##呲
+##味
+##呵
+##呷
+##呸
+##呻
+##呼
+##命
+##咀
+##咁
+##咂
+##咄
+##咆
+##咋
+##和
+##咎
+##咏
+##咐
+##咒
+##咔
+##咕
+##咖
+##咗
+##咘
+##咙
+##咚
+##咛
+##咣
+##咤
+##咦
+##咧
+##咨
+##咩
+##咪
+##咫
+##咬
+##咭
+##咯
+##咱
+##咲
+##咳
+##咸
+##咻
+##咽
+##咿
+##哀
+##品
+##哂
+##哄
+##哆
+##哇
+##哈
+##哉
+##哋
+##哌
+##响
+##哎
+##哏
+##哐
+##哑
+##哒
+##哔
+##哗
+##哟
+##員
+##哥
+##哦
+##哧
+##哨
+##哩
+##哪
+##哭
+##哮
+##哲
+##哺
+##哼
+##哽
+##唁
+##唄
+##唆
+##唇
+##唉
+##唏
+##唐
+##唑
+##唔
+##唠
+##唤
+##唧
+##唬
+##售
+##唯
+##唰
+##唱
+##唳
+##唷
+##唸
+##唾
+##啃
+##啄
+##商
+##啉
+##啊
+##問
+##啓
+##啕
+##啖
+##啜
+##啞
+##啟
+##啡
+##啤
+##啥
+##啦
+##啧
+##啪
+##啫
+##啬
+##啮
+##啰
+##啱
+##啲
+##啵
+##啶
+##啷
+##啸
+##啻
+##啼
+##啾
+##喀
+##喂
+##喃
+##善
+##喆
+##喇
+##喉
+##喊
+##喋
+##喎
+##喏
+##喔
+##喘
+##喙
+##喚
+##喜
+##喝
+##喟
+##喧
+##喪
+##喫
+##喬
+##單
+##喰
+##喱
+##喲
+##喳
+##喵
+##営
+##喷
+##喹
+##喺
+##喻
+##喽
+##嗅
+##嗆
+##嗇
+##嗎
+##嗑
+##嗒
+##嗓
+##嗔
+##嗖
+##嗚
+##嗜
+##嗝
+##嗟
+##嗡
+##嗣
+##嗤
+##嗦
+##嗨
+##嗪
+##嗬
+##嗯
+##嗰
+##嗲
+##嗳
+##嗶
+##嗷
+##嗽
+##嘀
+##嘅
+##嘆
+##嘈
+##嘉
+##嘌
+##嘍
+##嘎
+##嘔
+##嘖
+##嘗
+##嘘
+##嘚
+##嘛
+##嘜
+##嘞
+##嘟
+##嘢
+##嘣
+##嘤
+##嘧
+##嘩
+##嘭
+##嘮
+##嘯
+##嘰
+##嘱
+##嘲
+##嘴
+##嘶
+##嘸
+##嘹
+##嘻
+##嘿
+##噁
+##噌
+##噎
+##噓
+##噔
+##噗
+##噙
+##噜
+##噠
+##噢
+##噤
+##器
+##噩
+##噪
+##噬
+##噱
+##噴
+##噶
+##噸
+##噹
+##噻
+##噼
+##嚀
+##嚇
+##嚎
+##嚏
+##嚐
+##嚓
+##嚕
+##嚟
+##嚣
+##嚥
+##嚨
+##嚮
+##嚴
+##嚷
+##嚼
+##囂
+##囉
+##囊
+##囍
+##囑
+##囔
+##囗
+##囚
+##四
+##囝
+##回
+##囟
+##因
+##囡
+##团
+##団
+##囤
+##囧
+##囪
+##囫
+##园
+##困
+##囱
+##囲
+##図
+##围
+##囹
+##固
+##国
+##图
+##囿
+##圃
+##圄
+##圆
+##圈
+##國
+##圍
+##圏
+##園
+##圓
+##圖
+##團
+##圜
+##土
+##圣
+##圧
+##在
+##圩
+##圭
+##地
+##圳
+##场
+##圻
+##圾
+##址
+##坂
+##均
+##坊
+##坍
+##坎
+##坏
+##坐
+##坑
+##块
+##坚
+##坛
+##坝
+##坞
+##坟
+##坠
+##坡
+##坤
+##坦
+##坨
+##坪
+##坯
+##坳
+##坵
+##坷
+##垂
+##垃
+##垄
+##型
+##垒
+##垚
+##垛
+##垠
+##垢
+##垣
+##垦
+##垩
+##垫
+##垭
+##垮
+##垵
+##埂
+##埃
+##埋
+##城
+##埔
+##埕
+##埗
+##域
+##埠
+##埤
+##埵
+##執
+##埸
+##培
+##基
+##埼
+##堀
+##堂
+##堃
+##堅
+##堆
+##堇
+##堑
+##堕
+##堙
+##堡
+##堤
+##堪
+##堯
+##堰
+##報
+##場
+##堵
+##堺
+##堿
+##塊
+##塌
+##塑
+##塔
+##塗
+##塘
+##塚
+##塞
+##塢
+##塩
+##填
+##塬
+##塭
+##塵
+##塾
+##墀
+##境
+##墅
+##墉
+##墊
+##墒
+##墓
+##増
+##墘
+##墙
+##墜
+##增
+##墟
+##墨
+##墩
+##墮
+##墳
+##墻
+##墾
+##壁
+##壅
+##壆
+##壇
+##壊
+##壑
+##壓
+##壕
+##壘
+##壞
+##壟
+##壢
+##壤
+##壩
+##士
+##壬
+##壮
+##壯
+##声
+##売
+##壳
+##壶
+##壹
+##壺
+##壽
+##处
+##备
+##変
+##复
+##夏
+##夔
+##夕
+##外
+##夙
+##多
+##夜
+##够
+##夠
+##夢
+##夥
+##大
+##天
+##太
+##夫
+##夭
+##央
+##夯
+##失
+##头
+##夷
+##夸
+##夹
+##夺
+##夾
+##奂
+##奄
+##奇
+##奈
+##奉
+##奋
+##奎
+##奏
+##奐
+##契
+##奔
+##奕
+##奖
+##套
+##奘
+##奚
+##奠
+##奢
+##奥
+##奧
+##奪
+##奬
+##奮
+##女
+##奴
+##奶
+##奸
+##她
+##好
+##如
+##妃
+##妄
+##妆
+##妇
+##妈
+##妊
+##妍
+##妒
+##妓
+##妖
+##妘
+##妙
+##妝
+##妞
+##妣
+##妤
+##妥
+##妨
+##妩
+##妪
+##妮
+##妲
+##妳
+##妹
+##妻
+##妾
+##姆
+##姉
+##姊
+##始
+##姍
+##姐
+##姑
+##姒
+##姓
+##委
+##姗
+##姚
+##姜
+##姝
+##姣
+##姥
+##姦
+##姨
+##姪
+##姫
+##姬
+##姹
+##姻
+##姿
+##威
+##娃
+##娄
+##娅
+##娆
+##娇
+##娉
+##娑
+##娓
+##娘
+##娛
+##娜
+##娟
+##娠
+##娣
+##娥
+##娩
+##娱
+##娲
+##娴
+##娶
+##娼
+##婀
+##婁
+##婆
+##婉
+##婊
+##婕
+##婚
+##婢
+##婦
+##婧
+##婪
+##婭
+##婴
+##婵
+##婶
+##婷
+##婺
+##婿
+##媒
+##媚
+##媛
+##媞
+##媧
+##媲
+##媳
+##媽
+##媾
+##嫁
+##嫂
+##嫉
+##嫌
+##嫑
+##嫔
+##嫖
+##嫘
+##嫚
+##嫡
+##嫣
+##嫦
+##嫩
+##嫲
+##嫵
+##嫻
+##嬅
+##嬉
+##嬌
+##嬗
+##嬛
+##嬢
+##嬤
+##嬪
+##嬰
+##嬴
+##嬷
+##嬸
+##嬿
+##孀
+##孃
+##子
+##孑
+##孔
+##孕
+##孖
+##字
+##存
+##孙
+##孚
+##孛
+##孜
+##孝
+##孟
+##孢
+##季
+##孤
+##学
+##孩
+##孪
+##孫
+##孬
+##孰
+##孱
+##孳
+##孵
+##學
+##孺
+##孽
+##孿
+##宁
+##它
+##宅
+##宇
+##守
+##安
+##宋
+##完
+##宏
+##宓
+##宕
+##宗
+##官
+##宙
+##定
+##宛
+##宜
+##宝
+##实
+##実
+##宠
+##审
+##客
+##宣
+##室
+##宥
+##宦
+##宪
+##宫
+##宮
+##宰
+##害
+##宴
+##宵
+##家
+##宸
+##容
+##宽
+##宾
+##宿
+##寂
+##寄
+##寅
+##密
+##寇
+##富
+##寐
+##寒
+##寓
+##寛
+##寝
+##寞
+##察
+##寡
+##寢
+##寥
+##實
+##寧
+##寨
+##審
+##寫
+##寬
+##寮
+##寰
+##寵
+##寶
+##寸
+##对
+##寺
+##寻
+##导
+##対
+##寿
+##封
+##専
+##射
+##将
+##將
+##專
+##尉
+##尊
+##尋
+##對
+##導
+##小
+##少
+##尔
+##尕
+##尖
+##尘
+##尚
+##尝
+##尤
+##尧
+##尬
+##就
+##尴
+##尷
+##尸
+##尹
+##尺
+##尻
+##尼
+##尽
+##尾
+##尿
+##局
+##屁
+##层
+##屄
+##居
+##屆
+##屈
+##屉
+##届
+##屋
+##屌
+##屍
+##屎
+##屏
+##屐
+##屑
+##展
+##屜
+##属
+##屠
+##屡
+##屢
+##層
+##履
+##屬
+##屯
+##山
+##屹
+##屿
+##岀
+##岁
+##岂
+##岌
+##岐
+##岑
+##岔
+##岖
+##岗
+##岘
+##岙
+##岚
+##岛
+##岡
+##岩
+##岫
+##岬
+##岭
+##岱
+##岳
+##岷
+##岸
+##峇
+##峋
+##峒
+##峙
+##峡
+##峤
+##峥
+##峦
+##峨
+##峪
+##峭
+##峯
+##峰
+##峴
+##島
+##峻
+##峽
+##崁
+##崂
+##崆
+##崇
+##崎
+##崑
+##崔
+##崖
+##崗
+##崙
+##崛
+##崧
+##崩
+##崭
+##崴
+##崽
+##嵇
+##嵊
+##嵋
+##嵌
+##嵐
+##嵘
+##嵩
+##嵬
+##嵯
+##嶂
+##嶄
+##嶇
+##嶋
+##嶙
+##嶺
+##嶼
+##嶽
+##巅
+##巍
+##巒
+##巔
+##巖
+##川
+##州
+##巡
+##巢
+##工
+##左
+##巧
+##巨
+##巩
+##巫
+##差
+##己
+##已
+##巳
+##巴
+##巷
+##巻
+##巽
+##巾
+##巿
+##币
+##市
+##布
+##帅
+##帆
+##师
+##希
+##帐
+##帑
+##帕
+##帖
+##帘
+##帚
+##帛
+##帜
+##帝
+##帥
+##带
+##帧
+##師
+##席
+##帮
+##帯
+##帰
+##帳
+##帶
+##帷
+##常
+##帼
+##帽
+##幀
+##幂
+##幄
+##幅
+##幌
+##幔
+##幕
+##幟
+##幡
+##幢
+##幣
+##幫
+##干
+##平
+##年
+##并
+##幸
+##幹
+##幺
+##幻
+##幼
+##幽
+##幾
+##广
+##庁
+##広
+##庄
+##庆
+##庇
+##床
+##序
+##庐
+##库
+##应
+##底
+##庖
+##店
+##庙
+##庚
+##府
+##庞
+##废
+##庠
+##度
+##座
+##庫
+##庭
+##庵
+##庶
+##康
+##庸
+##庹
+##庾
+##廁
+##廂
+##廃
+##廈
+##廉
+##廊
+##廓
+##廖
+##廚
+##廝
+##廟
+##廠
+##廢
+##廣
+##廬
+##廳
+##延
+##廷
+##建
+##廿
+##开
+##弁
+##异
+##弃
+##弄
+##弈
+##弊
+##弋
+##式
+##弑
+##弒
+##弓
+##弔
+##引
+##弗
+##弘
+##弛
+##弟
+##张
+##弥
+##弦
+##弧
+##弩
+##弭
+##弯
+##弱
+##張
+##強
+##弹
+##强
+##弼
+##弾
+##彅
+##彆
+##彈
+##彌
+##彎
+##归
+##当
+##录
+##彗
+##彙
+##彝
+##形
+##彤
+##彥
+##彦
+##彧
+##彩
+##彪
+##彫
+##彬
+##彭
+##彰
+##影
+##彷
+##役
+##彻
+##彼
+##彿
+##往
+##征
+##径
+##待
+##徇
+##很
+##徉
+##徊
+##律
+##後
+##徐
+##徑
+##徒
+##従
+##徕
+##得
+##徘
+##徙
+##徜
+##從
+##徠
+##御
+##徨
+##復
+##循
+##徬
+##微
+##徳
+##徴
+##徵
+##德
+##徹
+##徼
+##徽
+##心
+##必
+##忆
+##忌
+##忍
+##忏
+##忐
+##忑
+##忒
+##忖
+##志
+##忘
+##忙
+##応
+##忠
+##忡
+##忤
+##忧
+##忪
+##快
+##忱
+##念
+##忻
+##忽
+##忿
+##怀
+##态
+##怂
+##怅
+##怆
+##怎
+##怏
+##怒
+##怔
+##怕
+##怖
+##怙
+##怜
+##思
+##怠
+##怡
+##急
+##怦
+##性
+##怨
+##怪
+##怯
+##怵
+##总
+##怼
+##恁
+##恃
+##恆
+##恋
+##恍
+##恐
+##恒
+##恕
+##恙
+##恚
+##恢
+##恣
+##恤
+##恥
+##恨
+##恩
+##恪
+##恫
+##恬
+##恭
+##息
+##恰
+##恳
+##恵
+##恶
+##恸
+##恺
+##恻
+##恼
+##恿
+##悄
+##悅
+##悉
+##悌
+##悍
+##悔
+##悖
+##悚
+##悟
+##悠
+##患
+##悦
+##您
+##悩
+##悪
+##悬
+##悯
+##悱
+##悲
+##悴
+##悵
+##悶
+##悸
+##悻
+##悼
+##悽
+##情
+##惆
+##惇
+##惊
+##惋
+##惑
+##惕
+##惘
+##惚
+##惜
+##惟
+##惠
+##惡
+##惦
+##惧
+##惨
+##惩
+##惫
+##惬
+##惭
+##惮
+##惯
+##惰
+##惱
+##想
+##惴
+##惶
+##惹
+##惺
+##愁
+##愆
+##愈
+##愉
+##愍
+##意
+##愕
+##愚
+##愛
+##愜
+##感
+##愣
+##愤
+##愧
+##愫
+##愷
+##愿
+##慄
+##慈
+##態
+##慌
+##慎
+##慑
+##慕
+##慘
+##慚
+##慟
+##慢
+##慣
+##慧
+##慨
+##慫
+##慮
+##慰
+##慳
+##慵
+##慶
+##慷
+##慾
+##憂
+##憊
+##憋
+##憎
+##憐
+##憑
+##憔
+##憚
+##憤
+##憧
+##憨
+##憩
+##憫
+##憬
+##憲
+##憶
+##憾
+##懂
+##懇
+##懈
+##應
+##懊
+##懋
+##懑
+##懒
+##懦
+##懲
+##懵
+##懶
+##懷
+##懸
+##懺
+##懼
+##懾
+##懿
+##戀
+##戈
+##戊
+##戌
+##戍
+##戎
+##戏
+##成
+##我
+##戒
+##戕
+##或
+##战
+##戚
+##戛
+##戟
+##戡
+##戦
+##截
+##戬
+##戮
+##戰
+##戲
+##戳
+##戴
+##戶
+##户
+##戸
+##戻
+##戾
+##房
+##所
+##扁
+##扇
+##扈
+##扉
+##手
+##才
+##扎
+##扑
+##扒
+##打
+##扔
+##払
+##托
+##扛
+##扣
+##扦
+##执
+##扩
+##扪
+##扫
+##扬
+##扭
+##扮
+##扯
+##扰
+##扱
+##扳
+##扶
+##批
+##扼
+##找
+##承
+##技
+##抄
+##抉
+##把
+##抑
+##抒
+##抓
+##投
+##抖
+##抗
+##折
+##抚
+##抛
+##抜
+##択
+##抟
+##抠
+##抡
+##抢
+##护
+##报
+##抨
+##披
+##抬
+##抱
+##抵
+##抹
+##押
+##抽
+##抿
+##拂
+##拄
+##担
+##拆
+##拇
+##拈
+##拉
+##拋
+##拌
+##拍
+##拎
+##拐
+##拒
+##拓
+##拔
+##拖
+##拗
+##拘
+##拙
+##拚
+##招
+##拜
+##拟
+##拡
+##拢
+##拣
+##拥
+##拦
+##拧
+##拨
+##择
+##括
+##拭
+##拮
+##拯
+##拱
+##拳
+##拴
+##拷
+##拼
+##拽
+##拾
+##拿
+##持
+##挂
+##指
+##挈
+##按
+##挎
+##挑
+##挖
+##挙
+##挚
+##挛
+##挝
+##挞
+##挟
+##挠
+##挡
+##挣
+##挤
+##挥
+##挨
+##挪
+##挫
+##振
+##挲
+##挹
+##挺
+##挽
+##挾
+##捂
+##捅
+##捆
+##捉
+##捋
+##捌
+##捍
+##捎
+##捏
+##捐
+##捕
+##捞
+##损
+##捡
+##换
+##捣
+##捧
+##捨
+##捩
+##据
+##捱
+##捲
+##捶
+##捷
+##捺
+##捻
+##掀
+##掂
+##掃
+##掇
+##授
+##掉
+##掌
+##掏
+##掐
+##排
+##掖
+##掘
+##掙
+##掛
+##掠
+##採
+##探
+##掣
+##接
+##控
+##推
+##掩
+##措
+##掬
+##掰
+##掲
+##掳
+##掴
+##掷
+##掸
+##掺
+##揀
+##揃
+##揄
+##揆
+##揉
+##揍
+##描
+##提
+##插
+##揖
+##揚
+##換
+##握
+##揣
+##揩
+##揪
+##揭
+##揮
+##援
+##揶
+##揸
+##揹
+##揽
+##搀
+##搁
+##搂
+##搅
+##損
+##搏
+##搐
+##搓
+##搔
+##搖
+##搗
+##搜
+##搞
+##搡
+##搪
+##搬
+##搭
+##搵
+##搶
+##携
+##搽
+##摀
+##摁
+##摄
+##摆
+##摇
+##摈
+##摊
+##摒
+##摔
+##摘
+##摞
+##摟
+##摧
+##摩
+##摯
+##摳
+##摸
+##摹
+##摺
+##摻
+##撂
+##撃
+##撅
+##撇
+##撈
+##撐
+##撑
+##撒
+##撓
+##撕
+##撚
+##撞
+##撤
+##撥
+##撩
+##撫
+##撬
+##播
+##撮
+##撰
+##撲
+##撵
+##撷
+##撸
+##撻
+##撼
+##撿
+##擀
+##擁
+##擂
+##擄
+##擅
+##擇
+##擊
+##擋
+##操
+##擎
+##擒
+##擔
+##擘
+##據
+##擞
+##擠
+##擡
+##擢
+##擦
+##擬
+##擰
+##擱
+##擲
+##擴
+##擷
+##擺
+##擼
+##擾
+##攀
+##攏
+##攒
+##攔
+##攘
+##攙
+##攜
+##攝
+##攞
+##攢
+##攣
+##攤
+##攥
+##攪
+##攫
+##攬
+##支
+##收
+##攸
+##改
+##攻
+##放
+##政
+##故
+##效
+##敌
+##敍
+##敎
+##敏
+##救
+##敕
+##敖
+##敗
+##敘
+##教
+##敛
+##敝
+##敞
+##敢
+##散
+##敦
+##敬
+##数
+##敲
+##整
+##敵
+##敷
+##數
+##斂
+##斃
+##文
+##斋
+##斌
+##斎
+##斐
+##斑
+##斓
+##斗
+##料
+##斛
+##斜
+##斟
+##斡
+##斤
+##斥
+##斧
+##斩
+##斫
+##斬
+##断
+##斯
+##新
+##斷
+##方
+##於
+##施
+##旁
+##旃
+##旅
+##旋
+##旌
+##旎
+##族
+##旖
+##旗
+##无
+##既
+##日
+##旦
+##旧
+##旨
+##早
+##旬
+##旭
+##旮
+##旱
+##时
+##旷
+##旺
+##旻
+##昀
+##昂
+##昆
+##昇
+##昉
+##昊
+##昌
+##明
+##昏
+##易
+##昔
+##昕
+##昙
+##星
+##映
+##春
+##昧
+##昨
+##昭
+##是
+##昱
+##昴
+##昵
+##昶
+##昼
+##显
+##晁
+##時
+##晃
+##晉
+##晋
+##晌
+##晏
+##晒
+##晓
+##晔
+##晕
+##晖
+##晗
+##晚
+##晝
+##晞
+##晟
+##晤
+##晦
+##晨
+##晩
+##普
+##景
+##晰
+##晴
+##晶
+##晷
+##智
+##晾
+##暂
+##暄
+##暇
+##暈
+##暉
+##暌
+##暐
+##暑
+##暖
+##暗
+##暝
+##暢
+##暧
+##暨
+##暫
+##暮
+##暱
+##暴
+##暸
+##暹
+##曄
+##曆
+##曇
+##曉
+##曖
+##曙
+##曜
+##曝
+##曠
+##曦
+##曬
+##曰
+##曲
+##曳
+##更
+##書
+##曹
+##曼
+##曾
+##替
+##最
+##會
+##月
+##有
+##朋
+##服
+##朐
+##朔
+##朕
+##朗
+##望
+##朝
+##期
+##朦
+##朧
+##木
+##未
+##末
+##本
+##札
+##朮
+##术
+##朱
+##朴
+##朵
+##机
+##朽
+##杀
+##杂
+##权
+##杆
+##杈
+##杉
+##李
+##杏
+##材
+##村
+##杓
+##杖
+##杜
+##杞
+##束
+##杠
+##条
+##来
+##杨
+##杭
+##杯
+##杰
+##東
+##杳
+##杵
+##杷
+##杼
+##松
+##板
+##极
+##构
+##枇
+##枉
+##枋
+##析
+##枕
+##林
+##枚
+##果
+##枝
+##枢
+##枣
+##枪
+##枫
+##枭
+##枯
+##枰
+##枱
+##枳
+##架
+##枷
+##枸
+##柄
+##柏
+##某
+##柑
+##柒
+##染
+##柔
+##柘
+##柚
+##柜
+##柞
+##柠
+##柢
+##查
+##柩
+##柬
+##柯
+##柱
+##柳
+##柴
+##柵
+##査
+##柿
+##栀
+##栃
+##栄
+##栅
+##标
+##栈
+##栉
+##栋
+##栎
+##栏
+##树
+##栓
+##栖
+##栗
+##校
+##栩
+##株
+##样
+##核
+##根
+##格
+##栽
+##栾
+##桀
+##桁
+##桂
+##桃
+##桅
+##框
+##案
+##桉
+##桌
+##桎
+##桐
+##桑
+##桓
+##桔
+##桜
+##桠
+##桡
+##桢
+##档
+##桥
+##桦
+##桧
+##桨
+##桩
+##桶
+##桿
+##梁
+##梅
+##梆
+##梏
+##梓
+##梗
+##條
+##梟
+##梢
+##梦
+##梧
+##梨
+##梭
+##梯
+##械
+##梳
+##梵
+##梶
+##检
+##棂
+##棄
+##棉
+##棋
+##棍
+##棒
+##棕
+##棗
+##棘
+##棚
+##棟
+##棠
+##棣
+##棧
+##森
+##棱
+##棲
+##棵
+##棹
+##棺
+##椁
+##椅
+##椋
+##植
+##椎
+##椒
+##検
+##椪
+##椭
+##椰
+##椹
+##椽
+##椿
+##楂
+##楊
+##楓
+##楔
+##楚
+##楝
+##楞
+##楠
+##楣
+##楨
+##楫
+##業
+##楮
+##極
+##楷
+##楸
+##楹
+##楼
+##楽
+##概
+##榄
+##榆
+##榈
+##榉
+##榔
+##榕
+##榖
+##榛
+##榜
+##榨
+##榫
+##榭
+##榮
+##榱
+##榴
+##榷
+##榻
+##槁
+##槃
+##構
+##槌
+##槍
+##槎
+##槐
+##槓
+##様
+##槛
+##槟
+##槤
+##槭
+##槲
+##槳
+##槻
+##槽
+##槿
+##樁
+##樂
+##樊
+##樑
+##樓
+##標
+##樞
+##樟
+##模
+##樣
+##権
+##横
+##樫
+##樯
+##樱
+##樵
+##樸
+##樹
+##樺
+##樽
+##樾
+##橄
+##橇
+##橋
+##橐
+##橘
+##橙
+##機
+##橡
+##橢
+##橫
+##橱
+##橹
+##橼
+##檀
+##檄
+##檎
+##檐
+##檔
+##檗
+##檜
+##檢
+##檬
+##檯
+##檳
+##檸
+##檻
+##櫃
+##櫚
+##櫛
+##櫥
+##櫸
+##櫻
+##欄
+##權
+##欒
+##欖
+##欠
+##次
+##欢
+##欣
+##欧
+##欲
+##欸
+##欺
+##欽
+##款
+##歆
+##歇
+##歉
+##歌
+##歎
+##歐
+##歓
+##歙
+##歛
+##歡
+##止
+##正
+##此
+##步
+##武
+##歧
+##歩
+##歪
+##歯
+##歲
+##歳
+##歴
+##歷
+##歸
+##歹
+##死
+##歼
+##殁
+##殃
+##殆
+##殇
+##殉
+##殊
+##残
+##殒
+##殓
+##殖
+##殘
+##殞
+##殡
+##殤
+##殭
+##殯
+##殲
+##殴
+##段
+##殷
+##殺
+##殼
+##殿
+##毀
+##毁
+##毂
+##毅
+##毆
+##毋
+##母
+##毎
+##每
+##毒
+##毓
+##比
+##毕
+##毗
+##毘
+##毙
+##毛
+##毡
+##毫
+##毯
+##毽
+##氈
+##氏
+##氐
+##民
+##氓
+##气
+##氖
+##気
+##氙
+##氛
+##氟
+##氡
+##氢
+##氣
+##氤
+##氦
+##氧
+##氨
+##氪
+##氫
+##氮
+##氯
+##氰
+##氲
+##水
+##氷
+##永
+##氹
+##氾
+##汀
+##汁
+##求
+##汆
+##汇
+##汉
+##汎
+##汐
+##汕
+##汗
+##汙
+##汛
+##汝
+##汞
+##江
+##池
+##污
+##汤
+##汨
+##汩
+##汪
+##汰
+##汲
+##汴
+##汶
+##汹
+##決
+##汽
+##汾
+##沁
+##沂
+##沃
+##沅
+##沈
+##沉
+##沌
+##沏
+##沐
+##沒
+##沓
+##沖
+##沙
+##沛
+##沟
+##没
+##沢
+##沣
+##沥
+##沦
+##沧
+##沪
+##沫
+##沭
+##沮
+##沱
+##河
+##沸
+##油
+##治
+##沼
+##沽
+##沾
+##沿
+##況
+##泄
+##泉
+##泊
+##泌
+##泓
+##法
+##泗
+##泛
+##泞
+##泠
+##泡
+##波
+##泣
+##泥
+##注
+##泪
+##泫
+##泮
+##泯
+##泰
+##泱
+##泳
+##泵
+##泷
+##泸
+##泻
+##泼
+##泽
+##泾
+##洁
+##洄
+##洋
+##洒
+##洗
+##洙
+##洛
+##洞
+##津
+##洩
+##洪
+##洮
+##洱
+##洲
+##洵
+##洶
+##洸
+##洹
+##活
+##洼
+##洽
+##派
+##流
+##浃
+##浄
+##浅
+##浆
+##浇
+##浊
+##测
+##济
+##浏
+##浑
+##浒
+##浓
+##浔
+##浙
+##浚
+##浜
+##浣
+##浦
+##浩
+##浪
+##浬
+##浮
+##浯
+##浴
+##海
+##浸
+##涂
+##涅
+##涇
+##消
+##涉
+##涌
+##涎
+##涓
+##涔
+##涕
+##涙
+##涛
+##涝
+##涞
+##涟
+##涠
+##涡
+##涣
+##涤
+##润
+##涧
+##涨
+##涩
+##涪
+##涮
+##涯
+##液
+##涵
+##涸
+##涼
+##涿
+##淀
+##淄
+##淅
+##淆
+##淇
+##淋
+##淌
+##淑
+##淒
+##淖
+##淘
+##淙
+##淚
+##淞
+##淡
+##淤
+##淦
+##淨
+##淩
+##淪
+##淫
+##淬
+##淮
+##深
+##淳
+##淵
+##混
+##淹
+##淺
+##添
+##淼
+##清
+##済
+##渉
+##渊
+##渋
+##渍
+##渎
+##渐
+##渔
+##渗
+##渙
+##渚
+##減
+##渝
+##渠
+##渡
+##渣
+##渤
+##渥
+##渦
+##温
+##測
+##渭
+##港
+##渲
+##渴
+##游
+##渺
+##渾
+##湃
+##湄
+##湊
+##湍
+##湖
+##湘
+##湛
+##湟
+##湧
+##湫
+##湮
+##湯
+##湳
+##湾
+##湿
+##満
+##溃
+##溅
+##溉
+##溏
+##源
+##準
+##溜
+##溝
+##溟
+##溢
+##溥
+##溧
+##溪
+##溫
+##溯
+##溱
+##溴
+##溶
+##溺
+##溼
+##滁
+##滂
+##滄
+##滅
+##滇
+##滋
+##滌
+##滑
+##滓
+##滔
+##滕
+##滙
+##滚
+##滝
+##滞
+##滟
+##满
+##滢
+##滤
+##滥
+##滦
+##滨
+##滩
+##滬
+##滯
+##滲
+##滴
+##滷
+##滸
+##滾
+##滿
+##漁
+##漂
+##漆
+##漉
+##漏
+##漓
+##演
+##漕
+##漠
+##漢
+##漣
+##漩
+##漪
+##漫
+##漬
+##漯
+##漱
+##漲
+##漳
+##漸
+##漾
+##漿
+##潆
+##潇
+##潋
+##潍
+##潑
+##潔
+##潘
+##潛
+##潜
+##潞
+##潟
+##潢
+##潤
+##潦
+##潧
+##潭
+##潮
+##潰
+##潴
+##潸
+##潺
+##潼
+##澀
+##澄
+##澆
+##澈
+##澍
+##澎
+##澗
+##澜
+##澡
+##澤
+##澧
+##澱
+##澳
+##澹
+##激
+##濁
+##濂
+##濃
+##濑
+##濒
+##濕
+##濘
+##濛
+##濟
+##濠
+##濡
+##濤
+##濫
+##濬
+##濮
+##濯
+##濱
+##濺
+##濾
+##瀅
+##瀆
+##瀉
+##瀋
+##瀏
+##瀑
+##瀕
+##瀘
+##瀚
+##瀛
+##瀝
+##瀞
+##瀟
+##瀧
+##瀨
+##瀬
+##瀰
+##瀾
+##灌
+##灏
+##灑
+##灘
+##灝
+##灞
+##灣
+##火
+##灬
+##灭
+##灯
+##灰
+##灵
+##灶
+##灸
+##灼
+##災
+##灾
+##灿
+##炀
+##炁
+##炅
+##炉
+##炊
+##炎
+##炒
+##炔
+##炕
+##炖
+##炙
+##炜
+##炫
+##炬
+##炭
+##炮
+##炯
+##炳
+##炷
+##炸
+##点
+##為
+##炼
+##炽
+##烁
+##烂
+##烃
+##烈
+##烊
+##烏
+##烘
+##烙
+##烛
+##烟
+##烤
+##烦
+##烧
+##烨
+##烩
+##烫
+##烬
+##热
+##烯
+##烷
+##烹
+##烽
+##焉
+##焊
+##焕
+##焖
+##焗
+##焘
+##焙
+##焚
+##焜
+##無
+##焦
+##焯
+##焰
+##焱
+##然
+##焼
+##煅
+##煉
+##煊
+##煌
+##煎
+##煒
+##煖
+##煙
+##煜
+##煞
+##煤
+##煥
+##煦
+##照
+##煨
+##煩
+##煮
+##煲
+##煸
+##煽
+##熄
+##熊
+##熏
+##熒
+##熔
+##熙
+##熟
+##熠
+##熨
+##熬
+##熱
+##熵
+##熹
+##熾
+##燁
+##燃
+##燄
+##燈
+##燉
+##燊
+##燎
+##燒
+##燔
+##燕
+##燙
+##燜
+##營
+##燥
+##燦
+##燧
+##燭
+##燮
+##燴
+##燻
+##燼
+##燿
+##爆
+##爍
+##爐
+##爛
+##爪
+##爬
+##爭
+##爰
+##爱
+##爲
+##爵
+##父
+##爷
+##爸
+##爹
+##爺
+##爻
+##爽
+##爾
+##牆
+##片
+##版
+##牌
+##牍
+##牒
+##牙
+##牛
+##牝
+##牟
+##牠
+##牡
+##牢
+##牦
+##牧
+##物
+##牯
+##牲
+##牴
+##牵
+##特
+##牺
+##牽
+##犀
+##犁
+##犄
+##犊
+##犍
+##犒
+##犢
+##犧
+##犬
+##犯
+##状
+##犷
+##犸
+##犹
+##狀
+##狂
+##狄
+##狈
+##狎
+##狐
+##狒
+##狗
+##狙
+##狞
+##狠
+##狡
+##狩
+##独
+##狭
+##狮
+##狰
+##狱
+##狸
+##狹
+##狼
+##狽
+##猎
+##猕
+##猖
+##猗
+##猙
+##猛
+##猜
+##猝
+##猥
+##猩
+##猪
+##猫
+##猬
+##献
+##猴
+##猶
+##猷
+##猾
+##猿
+##獄
+##獅
+##獎
+##獐
+##獒
+##獗
+##獠
+##獣
+##獨
+##獭
+##獰
+##獲
+##獵
+##獷
+##獸
+##獺
+##獻
+##獼
+##獾
+##玄
+##率
+##玉
+##王
+##玑
+##玖
+##玛
+##玟
+##玠
+##玥
+##玩
+##玫
+##玮
+##环
+##现
+##玲
+##玳
+##玷
+##玺
+##玻
+##珀
+##珂
+##珅
+##珈
+##珉
+##珊
+##珍
+##珏
+##珐
+##珑
+##珙
+##珞
+##珠
+##珣
+##珥
+##珩
+##珪
+##班
+##珮
+##珲
+##珺
+##現
+##球
+##琅
+##理
+##琇
+##琉
+##琊
+##琍
+##琏
+##琐
+##琛
+##琢
+##琥
+##琦
+##琨
+##琪
+##琬
+##琮
+##琰
+##琲
+##琳
+##琴
+##琵
+##琶
+##琺
+##琼
+##瑀
+##瑁
+##瑄
+##瑋
+##瑕
+##瑗
+##瑙
+##瑚
+##瑛
+##瑜
+##瑞
+##瑟
+##瑠
+##瑣
+##瑤
+##瑩
+##瑪
+##瑯
+##瑰
+##瑶
+##瑾
+##璀
+##璁
+##璃
+##璇
+##璉
+##璋
+##璎
+##璐
+##璜
+##璞
+##璟
+##璧
+##璨
+##環
+##璽
+##璿
+##瓊
+##瓏
+##瓒
+##瓜
+##瓢
+##瓣
+##瓤
+##瓦
+##瓮
+##瓯
+##瓴
+##瓶
+##瓷
+##甄
+##甌
+##甕
+##甘
+##甙
+##甚
+##甜
+##生
+##產
+##産
+##甥
+##甦
+##用
+##甩
+##甫
+##甬
+##甭
+##甯
+##田
+##由
+##甲
+##申
+##电
+##男
+##甸
+##町
+##画
+##甾
+##畀
+##畅
+##界
+##畏
+##畑
+##畔
+##留
+##畜
+##畝
+##畢
+##略
+##畦
+##番
+##畫
+##異
+##畲
+##畳
+##畴
+##當
+##畸
+##畹
+##畿
+##疆
+##疇
+##疊
+##疏
+##疑
+##疔
+##疖
+##疗
+##疙
+##疚
+##疝
+##疟
+##疡
+##疣
+##疤
+##疥
+##疫
+##疮
+##疯
+##疱
+##疲
+##疳
+##疵
+##疸
+##疹
+##疼
+##疽
+##疾
+##痂
+##病
+##症
+##痈
+##痉
+##痊
+##痍
+##痒
+##痔
+##痕
+##痘
+##痙
+##痛
+##痞
+##痠
+##痢
+##痣
+##痤
+##痧
+##痨
+##痪
+##痫
+##痰
+##痱
+##痴
+##痹
+##痺
+##痼
+##痿
+##瘀
+##瘁
+##瘋
+##瘍
+##瘓
+##瘘
+##瘙
+##瘟
+##瘠
+##瘡
+##瘢
+##瘤
+##瘦
+##瘧
+##瘩
+##瘪
+##瘫
+##瘴
+##瘸
+##瘾
+##療
+##癇
+##癌
+##癒
+##癖
+##癜
+##癞
+##癡
+##癢
+##癣
+##癥
+##癫
+##癬
+##癮
+##癱
+##癲
+##癸
+##発
+##登
+##發
+##白
+##百
+##皂
+##的
+##皆
+##皇
+##皈
+##皋
+##皎
+##皑
+##皓
+##皖
+##皙
+##皚
+##皮
+##皰
+##皱
+##皴
+##皺
+##皿
+##盂
+##盃
+##盅
+##盆
+##盈
+##益
+##盎
+##盏
+##盐
+##监
+##盒
+##盔
+##盖
+##盗
+##盘
+##盛
+##盜
+##盞
+##盟
+##盡
+##監
+##盤
+##盥
+##盧
+##盪
+##目
+##盯
+##盱
+##盲
+##直
+##相
+##盹
+##盼
+##盾
+##省
+##眈
+##眉
+##看
+##県
+##眙
+##眞
+##真
+##眠
+##眦
+##眨
+##眩
+##眯
+##眶
+##眷
+##眸
+##眺
+##眼
+##眾
+##着
+##睁
+##睇
+##睏
+##睐
+##睑
+##睛
+##睜
+##睞
+##睡
+##睢
+##督
+##睥
+##睦
+##睨
+##睪
+##睫
+##睬
+##睹
+##睽
+##睾
+##睿
+##瞄
+##瞅
+##瞇
+##瞋
+##瞌
+##瞎
+##瞑
+##瞒
+##瞓
+##瞞
+##瞟
+##瞠
+##瞥
+##瞧
+##瞩
+##瞪
+##瞬
+##瞭
+##瞰
+##瞳
+##瞻
+##瞼
+##瞿
+##矇
+##矍
+##矗
+##矚
+##矛
+##矜
+##矢
+##矣
+##知
+##矩
+##矫
+##短
+##矮
+##矯
+##石
+##矶
+##矽
+##矾
+##矿
+##码
+##砂
+##砌
+##砍
+##砒
+##研
+##砖
+##砗
+##砚
+##砝
+##砣
+##砥
+##砧
+##砭
+##砰
+##砲
+##破
+##砷
+##砸
+##砺
+##砼
+##砾
+##础
+##硅
+##硐
+##硒
+##硕
+##硝
+##硫
+##硬
+##确
+##硯
+##硼
+##碁
+##碇
+##碉
+##碌
+##碍
+##碎
+##碑
+##碓
+##碗
+##碘
+##碚
+##碛
+##碟
+##碣
+##碧
+##碩
+##碰
+##碱
+##碳
+##碴
+##確
+##碼
+##碾
+##磁
+##磅
+##磊
+##磋
+##磐
+##磕
+##磚
+##磡
+##磨
+##磬
+##磯
+##磲
+##磷
+##磺
+##礁
+##礎
+##礙
+##礡
+##礦
+##礪
+##礫
+##礴
+##示
+##礼
+##社
+##祀
+##祁
+##祂
+##祇
+##祈
+##祉
+##祎
+##祐
+##祕
+##祖
+##祗
+##祚
+##祛
+##祜
+##祝
+##神
+##祟
+##祠
+##祢
+##祥
+##票
+##祭
+##祯
+##祷
+##祸
+##祺
+##祿
+##禀
+##禁
+##禄
+##禅
+##禍
+##禎
+##福
+##禛
+##禦
+##禧
+##禪
+##禮
+##禱
+##禹
+##禺
+##离
+##禽
+##禾
+##禿
+##秀
+##私
+##秃
+##秆
+##秉
+##秋
+##种
+##科
+##秒
+##秘
+##租
+##秣
+##秤
+##秦
+##秧
+##秩
+##秭
+##积
+##称
+##秸
+##移
+##秽
+##稀
+##稅
+##程
+##稍
+##税
+##稔
+##稗
+##稚
+##稜
+##稞
+##稟
+##稠
+##稣
+##種
+##稱
+##稲
+##稳
+##稷
+##稹
+##稻
+##稼
+##稽
+##稿
+##穀
+##穂
+##穆
+##穌
+##積
+##穎
+##穗
+##穢
+##穩
+##穫
+##穴
+##究
+##穷
+##穹
+##空
+##穿
+##突
+##窃
+##窄
+##窈
+##窍
+##窑
+##窒
+##窓
+##窕
+##窖
+##窗
+##窘
+##窜
+##窝
+##窟
+##窠
+##窥
+##窦
+##窨
+##窩
+##窪
+##窮
+##窯
+##窺
+##窿
+##竄
+##竅
+##竇
+##竊
+##立
+##竖
+##站
+##竜
+##竞
+##竟
+##章
+##竣
+##童
+##竭
+##端
+##競
+##竹
+##竺
+##竽
+##竿
+##笃
+##笆
+##笈
+##笋
+##笏
+##笑
+##笔
+##笙
+##笛
+##笞
+##笠
+##符
+##笨
+##第
+##笹
+##笺
+##笼
+##筆
+##等
+##筊
+##筋
+##筍
+##筏
+##筐
+##筑
+##筒
+##答
+##策
+##筛
+##筝
+##筠
+##筱
+##筲
+##筵
+##筷
+##筹
+##签
+##简
+##箇
+##箋
+##箍
+##箏
+##箐
+##箔
+##箕
+##算
+##箝
+##管
+##箩
+##箫
+##箭
+##箱
+##箴
+##箸
+##節
+##篁
+##範
+##篆
+##篇
+##築
+##篑
+##篓
+##篙
+##篝
+##篠
+##篡
+##篤
+##篩
+##篪
+##篮
+##篱
+##篷
+##簇
+##簌
+##簍
+##簡
+##簦
+##簧
+##簪
+##簫
+##簷
+##簸
+##簽
+##簾
+##簿
+##籁
+##籃
+##籌
+##籍
+##籐
+##籟
+##籠
+##籤
+##籬
+##籮
+##籲
+##米
+##类
+##籼
+##籽
+##粄
+##粉
+##粑
+##粒
+##粕
+##粗
+##粘
+##粟
+##粤
+##粥
+##粧
+##粪
+##粮
+##粱
+##粲
+##粳
+##粵
+##粹
+##粼
+##粽
+##精
+##粿
+##糅
+##糊
+##糍
+##糕
+##糖
+##糗
+##糙
+##糜
+##糞
+##糟
+##糠
+##糧
+##糬
+##糯
+##糰
+##糸
+##系
+##糾
+##紀
+##紂
+##約
+##紅
+##紉
+##紊
+##紋
+##納
+##紐
+##紓
+##純
+##紗
+##紘
+##紙
+##級
+##紛
+##紜
+##素
+##紡
+##索
+##紧
+##紫
+##紮
+##累
+##細
+##紳
+##紹
+##紺
+##終
+##絃
+##組
+##絆
+##経
+##結
+##絕
+##絞
+##絡
+##絢
+##給
+##絨
+##絮
+##統
+##絲
+##絳
+##絵
+##絶
+##絹
+##綁
+##綏
+##綑
+##經
+##継
+##続
+##綜
+##綠
+##綢
+##綦
+##綫
+##綬
+##維
+##綱
+##網
+##綴
+##綵
+##綸
+##綺
+##綻
+##綽
+##綾
+##綿
+##緊
+##緋
+##総
+##緑
+##緒
+##緘
+##線
+##緝
+##緞
+##締
+##緣
+##編
+##緩
+##緬
+##緯
+##練
+##緹
+##緻
+##縁
+##縄
+##縈
+##縛
+##縝
+##縣
+##縫
+##縮
+##縱
+##縴
+##縷
+##總
+##績
+##繁
+##繃
+##繆
+##繇
+##繋
+##織
+##繕
+##繚
+##繞
+##繡
+##繩
+##繪
+##繫
+##繭
+##繳
+##繹
+##繼
+##繽
+##纂
+##續
+##纍
+##纏
+##纓
+##纔
+##纖
+##纜
+##纠
+##红
+##纣
+##纤
+##约
+##级
+##纨
+##纪
+##纫
+##纬
+##纭
+##纯
+##纰
+##纱
+##纲
+##纳
+##纵
+##纶
+##纷
+##纸
+##纹
+##纺
+##纽
+##纾
+##线
+##绀
+##练
+##组
+##绅
+##细
+##织
+##终
+##绊
+##绍
+##绎
+##经
+##绑
+##绒
+##结
+##绔
+##绕
+##绘
+##给
+##绚
+##绛
+##络
+##绝
+##绞
+##统
+##绡
+##绢
+##绣
+##绥
+##绦
+##继
+##绩
+##绪
+##绫
+##续
+##绮
+##绯
+##绰
+##绳
+##维
+##绵
+##绶
+##绷
+##绸
+##绻
+##综
+##绽
+##绾
+##绿
+##缀
+##缄
+##缅
+##缆
+##缇
+##缈
+##缉
+##缎
+##缓
+##缔
+##缕
+##编
+##缘
+##缙
+##缚
+##缜
+##缝
+##缠
+##缢
+##缤
+##缥
+##缨
+##缩
+##缪
+##缭
+##缮
+##缰
+##缱
+##缴
+##缸
+##缺
+##缽
+##罂
+##罄
+##罌
+##罐
+##网
+##罔
+##罕
+##罗
+##罚
+##罡
+##罢
+##罩
+##罪
+##置
+##罰
+##署
+##罵
+##罷
+##罹
+##羁
+##羅
+##羈
+##羊
+##羌
+##美
+##羔
+##羚
+##羞
+##羟
+##羡
+##羣
+##群
+##羥
+##羧
+##羨
+##義
+##羯
+##羲
+##羸
+##羹
+##羽
+##羿
+##翁
+##翅
+##翊
+##翌
+##翎
+##習
+##翔
+##翘
+##翟
+##翠
+##翡
+##翦
+##翩
+##翰
+##翱
+##翳
+##翹
+##翻
+##翼
+##耀
+##老
+##考
+##耄
+##者
+##耆
+##耋
+##而
+##耍
+##耐
+##耒
+##耕
+##耗
+##耘
+##耙
+##耦
+##耨
+##耳
+##耶
+##耷
+##耸
+##耻
+##耽
+##耿
+##聂
+##聆
+##聊
+##聋
+##职
+##聒
+##联
+##聖
+##聘
+##聚
+##聞
+##聪
+##聯
+##聰
+##聲
+##聳
+##聴
+##聶
+##職
+##聽
+##聾
+##聿
+##肃
+##肄
+##肅
+##肆
+##肇
+##肉
+##肋
+##肌
+##肏
+##肓
+##肖
+##肘
+##肚
+##肛
+##肝
+##肠
+##股
+##肢
+##肤
+##肥
+##肩
+##肪
+##肮
+##肯
+##肱
+##育
+##肴
+##肺
+##肽
+##肾
+##肿
+##胀
+##胁
+##胃
+##胄
+##胆
+##背
+##胍
+##胎
+##胖
+##胚
+##胛
+##胜
+##胝
+##胞
+##胡
+##胤
+##胥
+##胧
+##胫
+##胭
+##胯
+##胰
+##胱
+##胳
+##胴
+##胶
+##胸
+##胺
+##能
+##脂
+##脅
+##脆
+##脇
+##脈
+##脉
+##脊
+##脍
+##脏
+##脐
+##脑
+##脓
+##脖
+##脘
+##脚
+##脛
+##脣
+##脩
+##脫
+##脯
+##脱
+##脲
+##脳
+##脸
+##脹
+##脾
+##腆
+##腈
+##腊
+##腋
+##腌
+##腎
+##腐
+##腑
+##腓
+##腔
+##腕
+##腥
+##腦
+##腩
+##腫
+##腭
+##腮
+##腰
+##腱
+##腳
+##腴
+##腸
+##腹
+##腺
+##腻
+##腼
+##腾
+##腿
+##膀
+##膈
+##膊
+##膏
+##膑
+##膘
+##膚
+##膛
+##膜
+##膝
+##膠
+##膦
+##膨
+##膩
+##膳
+##膺
+##膻
+##膽
+##膾
+##膿
+##臀
+##臂
+##臃
+##臆
+##臉
+##臊
+##臍
+##臓
+##臘
+##臟
+##臣
+##臥
+##臧
+##臨
+##自
+##臬
+##臭
+##至
+##致
+##臺
+##臻
+##臼
+##臾
+##舀
+##舂
+##舅
+##舆
+##與
+##興
+##舉
+##舊
+##舌
+##舍
+##舎
+##舐
+##舒
+##舔
+##舖
+##舗
+##舛
+##舜
+##舞
+##舟
+##航
+##舫
+##般
+##舰
+##舱
+##舵
+##舶
+##舷
+##舸
+##船
+##舺
+##舾
+##艇
+##艋
+##艘
+##艙
+##艦
+##艮
+##良
+##艰
+##艱
+##色
+##艳
+##艷
+##艹
+##艺
+##艾
+##节
+##芃
+##芈
+##芊
+##芋
+##芍
+##芎
+##芒
+##芙
+##芜
+##芝
+##芡
+##芥
+##芦
+##芩
+##芪
+##芫
+##芬
+##芭
+##芮
+##芯
+##花
+##芳
+##芷
+##芸
+##芹
+##芻
+##芽
+##芾
+##苁
+##苄
+##苇
+##苋
+##苍
+##苏
+##苑
+##苒
+##苓
+##苔
+##苕
+##苗
+##苛
+##苜
+##苞
+##苟
+##苡
+##苣
+##若
+##苦
+##苫
+##苯
+##英
+##苷
+##苹
+##苻
+##茁
+##茂
+##范
+##茄
+##茅
+##茉
+##茎
+##茏
+##茗
+##茜
+##茧
+##茨
+##茫
+##茬
+##茭
+##茯
+##茱
+##茲
+##茴
+##茵
+##茶
+##茸
+##茹
+##茼
+##荀
+##荃
+##荆
+##草
+##荊
+##荏
+##荐
+##荒
+##荔
+##荖
+##荘
+##荚
+##荞
+##荟
+##荠
+##荡
+##荣
+##荤
+##荥
+##荧
+##荨
+##荪
+##荫
+##药
+##荳
+##荷
+##荸
+##荻
+##荼
+##荽
+##莅
+##莆
+##莉
+##莊
+##莎
+##莒
+##莓
+##莖
+##莘
+##莞
+##莠
+##莢
+##莧
+##莪
+##莫
+##莱
+##莲
+##莴
+##获
+##莹
+##莺
+##莽
+##莿
+##菀
+##菁
+##菅
+##菇
+##菈
+##菊
+##菌
+##菏
+##菓
+##菖
+##菘
+##菜
+##菟
+##菠
+##菡
+##菩
+##華
+##菱
+##菲
+##菸
+##菽
+##萁
+##萃
+##萄
+##萊
+##萋
+##萌
+##萍
+##萎
+##萘
+##萝
+##萤
+##营
+##萦
+##萧
+##萨
+##萩
+##萬
+##萱
+##萵
+##萸
+##萼
+##落
+##葆
+##葉
+##著
+##葚
+##葛
+##葡
+##董
+##葦
+##葩
+##葫
+##葬
+##葭
+##葯
+##葱
+##葳
+##葵
+##葷
+##葺
+##蒂
+##蒋
+##蒐
+##蒔
+##蒙
+##蒜
+##蒞
+##蒟
+##蒡
+##蒨
+##蒲
+##蒸
+##蒹
+##蒻
+##蒼
+##蒿
+##蓁
+##蓄
+##蓆
+##蓉
+##蓋
+##蓑
+##蓓
+##蓖
+##蓝
+##蓟
+##蓦
+##蓬
+##蓮
+##蓼
+##蓿
+##蔑
+##蔓
+##蔔
+##蔗
+##蔘
+##蔚
+##蔡
+##蔣
+##蔥
+##蔫
+##蔬
+##蔭
+##蔵
+##蔷
+##蔺
+##蔻
+##蔼
+##蔽
+##蕁
+##蕃
+##蕈
+##蕉
+##蕊
+##蕎
+##蕙
+##蕤
+##蕨
+##蕩
+##蕪
+##蕭
+##蕲
+##蕴
+##蕻
+##蕾
+##薄
+##薅
+##薇
+##薈
+##薊
+##薏
+##薑
+##薔
+##薙
+##薛
+##薦
+##薨
+##薩
+##薪
+##薬
+##薯
+##薰
+##薹
+##藉
+##藍
+##藏
+##藐
+##藓
+##藕
+##藜
+##藝
+##藤
+##藥
+##藩
+##藹
+##藻
+##藿
+##蘆
+##蘇
+##蘊
+##蘋
+##蘑
+##蘚
+##蘭
+##蘸
+##蘼
+##蘿
+##虎
+##虏
+##虐
+##虑
+##虔
+##處
+##虚
+##虛
+##虜
+##虞
+##號
+##虢
+##虧
+##虫
+##虬
+##虱
+##虹
+##虻
+##虽
+##虾
+##蚀
+##蚁
+##蚂
+##蚊
+##蚌
+##蚓
+##蚕
+##蚜
+##蚝
+##蚣
+##蚤
+##蚩
+##蚪
+##蚯
+##蚱
+##蚵
+##蛀
+##蛆
+##蛇
+##蛊
+##蛋
+##蛎
+##蛐
+##蛔
+##蛙
+##蛛
+##蛟
+##蛤
+##蛭
+##蛮
+##蛰
+##蛳
+##蛹
+##蛻
+##蛾
+##蜀
+##蜂
+##蜃
+##蜆
+##蜇
+##蜈
+##蜊
+##蜍
+##蜒
+##蜓
+##蜕
+##蜗
+##蜘
+##蜚
+##蜜
+##蜡
+##蜢
+##蜥
+##蜱
+##蜴
+##蜷
+##蜻
+##蜿
+##蝇
+##蝈
+##蝉
+##蝌
+##蝎
+##蝕
+##蝗
+##蝙
+##蝟
+##蝠
+##蝦
+##蝨
+##蝴
+##蝶
+##蝸
+##蝼
+##螂
+##螃
+##融
+##螞
+##螢
+##螨
+##螯
+##螳
+##螺
+##蟀
+##蟄
+##蟆
+##蟋
+##蟎
+##蟑
+##蟒
+##蟠
+##蟬
+##蟲
+##蟹
+##蟻
+##蟾
+##蠅
+##蠍
+##蠔
+##蠕
+##蠛
+##蠟
+##蠡
+##蠢
+##蠣
+##蠱
+##蠶
+##蠹
+##蠻
+##血
+##衄
+##衅
+##衆
+##行
+##衍
+##術
+##衔
+##街
+##衙
+##衛
+##衝
+##衞
+##衡
+##衢
+##衣
+##补
+##表
+##衩
+##衫
+##衬
+##衮
+##衰
+##衲
+##衷
+##衹
+##衾
+##衿
+##袁
+##袂
+##袄
+##袅
+##袈
+##袋
+##袍
+##袒
+##袖
+##袜
+##袞
+##袤
+##袪
+##被
+##袭
+##袱
+##裁
+##裂
+##装
+##裆
+##裊
+##裏
+##裔
+##裕
+##裘
+##裙
+##補
+##裝
+##裟
+##裡
+##裤
+##裨
+##裱
+##裳
+##裴
+##裸
+##裹
+##製
+##裾
+##褂
+##複
+##褐
+##褒
+##褓
+##褔
+##褚
+##褥
+##褪
+##褫
+##褲
+##褶
+##褻
+##襁
+##襄
+##襟
+##襠
+##襪
+##襬
+##襯
+##襲
+##西
+##要
+##覃
+##覆
+##覇
+##見
+##規
+##覓
+##視
+##覚
+##覦
+##覧
+##親
+##覬
+##観
+##覷
+##覺
+##覽
+##觀
+##见
+##观
+##规
+##觅
+##视
+##览
+##觉
+##觊
+##觎
+##觐
+##觑
+##角
+##觞
+##解
+##觥
+##触
+##觸
+##言
+##訂
+##計
+##訊
+##討
+##訓
+##訕
+##訖
+##託
+##記
+##訛
+##訝
+##訟
+##訣
+##訥
+##訪
+##設
+##許
+##訳
+##訴
+##訶
+##診
+##註
+##証
+##詆
+##詐
+##詔
+##評
+##詛
+##詞
+##詠
+##詡
+##詢
+##詣
+##試
+##詩
+##詫
+##詬
+##詭
+##詮
+##詰
+##話
+##該
+##詳
+##詹
+##詼
+##誅
+##誇
+##誉
+##誌
+##認
+##誓
+##誕
+##誘
+##語
+##誠
+##誡
+##誣
+##誤
+##誥
+##誦
+##誨
+##說
+##説
+##読
+##誰
+##課
+##誹
+##誼
+##調
+##諄
+##談
+##請
+##諏
+##諒
+##論
+##諗
+##諜
+##諡
+##諦
+##諧
+##諫
+##諭
+##諮
+##諱
+##諳
+##諷
+##諸
+##諺
+##諾
+##謀
+##謁
+##謂
+##謄
+##謊
+##謎
+##謐
+##謔
+##謗
+##謙
+##講
+##謝
+##謠
+##謨
+##謬
+##謹
+##謾
+##譁
+##證
+##譎
+##譏
+##識
+##譙
+##譚
+##譜
+##警
+##譬
+##譯
+##議
+##譲
+##譴
+##護
+##譽
+##讀
+##變
+##讓
+##讚
+##讞
+##计
+##订
+##认
+##讥
+##讧
+##讨
+##让
+##讪
+##讫
+##训
+##议
+##讯
+##记
+##讲
+##讳
+##讴
+##讶
+##讷
+##许
+##讹
+##论
+##讼
+##讽
+##设
+##访
+##诀
+##证
+##诃
+##评
+##诅
+##识
+##诈
+##诉
+##诊
+##诋
+##词
+##诏
+##译
+##试
+##诗
+##诘
+##诙
+##诚
+##诛
+##话
+##诞
+##诟
+##诠
+##诡
+##询
+##诣
+##诤
+##该
+##详
+##诧
+##诩
+##诫
+##诬
+##语
+##误
+##诰
+##诱
+##诲
+##说
+##诵
+##诶
+##请
+##诸
+##诺
+##读
+##诽
+##课
+##诿
+##谀
+##谁
+##调
+##谄
+##谅
+##谆
+##谈
+##谊
+##谋
+##谌
+##谍
+##谎
+##谏
+##谐
+##谑
+##谒
+##谓
+##谔
+##谕
+##谗
+##谘
+##谙
+##谚
+##谛
+##谜
+##谟
+##谢
+##谣
+##谤
+##谥
+##谦
+##谧
+##谨
+##谩
+##谪
+##谬
+##谭
+##谯
+##谱
+##谲
+##谴
+##谶
+##谷
+##豁
+##豆
+##豇
+##豈
+##豉
+##豊
+##豌
+##豎
+##豐
+##豔
+##豚
+##象
+##豢
+##豪
+##豫
+##豬
+##豹
+##豺
+##貂
+##貅
+##貌
+##貓
+##貔
+##貘
+##貝
+##貞
+##負
+##財
+##貢
+##貧
+##貨
+##販
+##貪
+##貫
+##責
+##貯
+##貰
+##貳
+##貴
+##貶
+##買
+##貸
+##費
+##貼
+##貽
+##貿
+##賀
+##賁
+##賂
+##賃
+##賄
+##資
+##賈
+##賊
+##賑
+##賓
+##賜
+##賞
+##賠
+##賡
+##賢
+##賣
+##賤
+##賦
+##質
+##賬
+##賭
+##賴
+##賺
+##購
+##賽
+##贅
+##贈
+##贊
+##贍
+##贏
+##贓
+##贖
+##贛
+##贝
+##贞
+##负
+##贡
+##财
+##责
+##贤
+##败
+##账
+##货
+##质
+##贩
+##贪
+##贫
+##贬
+##购
+##贮
+##贯
+##贰
+##贱
+##贲
+##贴
+##贵
+##贷
+##贸
+##费
+##贺
+##贻
+##贼
+##贾
+##贿
+##赁
+##赂
+##赃
+##资
+##赅
+##赈
+##赊
+##赋
+##赌
+##赎
+##赏
+##赐
+##赓
+##赔
+##赖
+##赘
+##赚
+##赛
+##赝
+##赞
+##赠
+##赡
+##赢
+##赣
+##赤
+##赦
+##赧
+##赫
+##赭
+##走
+##赳
+##赴
+##赵
+##赶
+##起
+##趁
+##超
+##越
+##趋
+##趕
+##趙
+##趟
+##趣
+##趨
+##足
+##趴
+##趵
+##趸
+##趺
+##趾
+##跃
+##跄
+##跆
+##跋
+##跌
+##跎
+##跑
+##跖
+##跚
+##跛
+##距
+##跟
+##跡
+##跤
+##跨
+##跩
+##跪
+##路
+##跳
+##践
+##跷
+##跹
+##跺
+##跻
+##踉
+##踊
+##踌
+##踏
+##踐
+##踝
+##踞
+##踟
+##踢
+##踩
+##踪
+##踮
+##踱
+##踴
+##踵
+##踹
+##蹂
+##蹄
+##蹇
+##蹈
+##蹉
+##蹊
+##蹋
+##蹑
+##蹒
+##蹙
+##蹟
+##蹣
+##蹤
+##蹦
+##蹩
+##蹬
+##蹭
+##蹲
+##蹴
+##蹶
+##蹺
+##蹼
+##蹿
+##躁
+##躇
+##躉
+##躊
+##躋
+##躍
+##躏
+##躪
+##身
+##躬
+##躯
+##躲
+##躺
+##軀
+##車
+##軋
+##軌
+##軍
+##軒
+##軟
+##転
+##軸
+##軼
+##軽
+##軾
+##較
+##載
+##輒
+##輓
+##輔
+##輕
+##輛
+##輝
+##輟
+##輩
+##輪
+##輯
+##輸
+##輻
+##輾
+##輿
+##轄
+##轅
+##轆
+##轉
+##轍
+##轎
+##轟
+##车
+##轧
+##轨
+##轩
+##转
+##轭
+##轮
+##软
+##轰
+##轲
+##轴
+##轶
+##轻
+##轼
+##载
+##轿
+##较
+##辄
+##辅
+##辆
+##辇
+##辈
+##辉
+##辊
+##辍
+##辐
+##辑
+##输
+##辕
+##辖
+##辗
+##辘
+##辙
+##辛
+##辜
+##辞
+##辟
+##辣
+##辦
+##辨
+##辩
+##辫
+##辭
+##辮
+##辯
+##辰
+##辱
+##農
+##边
+##辺
+##辻
+##込
+##辽
+##达
+##迁
+##迂
+##迄
+##迅
+##过
+##迈
+##迎
+##运
+##近
+##返
+##还
+##这
+##进
+##远
+##违
+##连
+##迟
+##迢
+##迤
+##迥
+##迦
+##迩
+##迪
+##迫
+##迭
+##述
+##迴
+##迷
+##迸
+##迹
+##迺
+##追
+##退
+##送
+##适
+##逃
+##逅
+##逆
+##选
+##逊
+##逍
+##透
+##逐
+##递
+##途
+##逕
+##逗
+##這
+##通
+##逛
+##逝
+##逞
+##速
+##造
+##逢
+##連
+##逮
+##週
+##進
+##逵
+##逶
+##逸
+##逻
+##逼
+##逾
+##遁
+##遂
+##遅
+##遇
+##遊
+##運
+##遍
+##過
+##遏
+##遐
+##遑
+##遒
+##道
+##達
+##違
+##遗
+##遙
+##遛
+##遜
+##遞
+##遠
+##遢
+##遣
+##遥
+##遨
+##適
+##遭
+##遮
+##遲
+##遴
+##遵
+##遶
+##遷
+##選
+##遺
+##遼
+##遽
+##避
+##邀
+##邁
+##邂
+##邃
+##還
+##邇
+##邈
+##邊
+##邋
+##邏
+##邑
+##邓
+##邕
+##邛
+##邝
+##邢
+##那
+##邦
+##邨
+##邪
+##邬
+##邮
+##邯
+##邰
+##邱
+##邳
+##邵
+##邸
+##邹
+##邺
+##邻
+##郁
+##郅
+##郊
+##郎
+##郑
+##郜
+##郝
+##郡
+##郢
+##郤
+##郦
+##郧
+##部
+##郫
+##郭
+##郴
+##郵
+##郷
+##郸
+##都
+##鄂
+##鄉
+##鄒
+##鄔
+##鄙
+##鄞
+##鄢
+##鄧
+##鄭
+##鄰
+##鄱
+##鄲
+##鄺
+##酉
+##酊
+##酋
+##酌
+##配
+##酐
+##酒
+##酗
+##酚
+##酝
+##酢
+##酣
+##酥
+##酩
+##酪
+##酬
+##酮
+##酯
+##酰
+##酱
+##酵
+##酶
+##酷
+##酸
+##酿
+##醃
+##醇
+##醉
+##醋
+##醍
+##醐
+##醒
+##醚
+##醛
+##醜
+##醞
+##醣
+##醪
+##醫
+##醬
+##醮
+##醯
+##醴
+##醺
+##釀
+##釁
+##采
+##釉
+##释
+##釋
+##里
+##重
+##野
+##量
+##釐
+##金
+##釗
+##釘
+##釜
+##針
+##釣
+##釦
+##釧
+##釵
+##鈀
+##鈉
+##鈍
+##鈎
+##鈔
+##鈕
+##鈞
+##鈣
+##鈦
+##鈪
+##鈴
+##鈺
+##鈾
+##鉀
+##鉄
+##鉅
+##鉉
+##鉑
+##鉗
+##鉚
+##鉛
+##鉤
+##鉴
+##鉻
+##銀
+##銃
+##銅
+##銑
+##銓
+##銖
+##銘
+##銜
+##銬
+##銭
+##銮
+##銳
+##銷
+##銹
+##鋁
+##鋅
+##鋒
+##鋤
+##鋪
+##鋰
+##鋸
+##鋼
+##錄
+##錐
+##錘
+##錚
+##錠
+##錢
+##錦
+##錨
+##錫
+##錮
+##錯
+##録
+##錳
+##錶
+##鍊
+##鍋
+##鍍
+##鍛
+##鍥
+##鍰
+##鍵
+##鍺
+##鍾
+##鎂
+##鎊
+##鎌
+##鎏
+##鎔
+##鎖
+##鎗
+##鎚
+##鎧
+##鎬
+##鎮
+##鎳
+##鏈
+##鏖
+##鏗
+##鏘
+##鏞
+##鏟
+##鏡
+##鏢
+##鏤
+##鏽
+##鐘
+##鐮
+##鐲
+##鐳
+##鐵
+##鐸
+##鐺
+##鑄
+##鑊
+##鑑
+##鑒
+##鑣
+##鑫
+##鑰
+##鑲
+##鑼
+##鑽
+##鑾
+##鑿
+##针
+##钉
+##钊
+##钎
+##钏
+##钒
+##钓
+##钗
+##钙
+##钛
+##钜
+##钝
+##钞
+##钟
+##钠
+##钡
+##钢
+##钣
+##钤
+##钥
+##钦
+##钧
+##钨
+##钩
+##钮
+##钯
+##钰
+##钱
+##钳
+##钴
+##钵
+##钺
+##钻
+##钼
+##钾
+##钿
+##铀
+##铁
+##铂
+##铃
+##铄
+##铅
+##铆
+##铉
+##铎
+##铐
+##铛
+##铜
+##铝
+##铠
+##铡
+##铢
+##铣
+##铤
+##铨
+##铩
+##铬
+##铭
+##铮
+##铰
+##铲
+##铵
+##银
+##铸
+##铺
+##链
+##铿
+##销
+##锁
+##锂
+##锄
+##锅
+##锆
+##锈
+##锉
+##锋
+##锌
+##锏
+##锐
+##锑
+##错
+##锚
+##锟
+##锡
+##锢
+##锣
+##锤
+##锥
+##锦
+##锭
+##键
+##锯
+##锰
+##锲
+##锵
+##锹
+##锺
+##锻
+##镀
+##镁
+##镂
+##镇
+##镉
+##镌
+##镍
+##镐
+##镑
+##镕
+##镖
+##镗
+##镛
+##镜
+##镣
+##镭
+##镯
+##镰
+##镳
+##镶
+##長
+##长
+##門
+##閃
+##閉
+##開
+##閎
+##閏
+##閑
+##閒
+##間
+##閔
+##閘
+##閡
+##関
+##閣
+##閥
+##閨
+##閩
+##閱
+##閲
+##閹
+##閻
+##閾
+##闆
+##闇
+##闊
+##闌
+##闍
+##闔
+##闕
+##闖
+##闘
+##關
+##闡
+##闢
+##门
+##闪
+##闫
+##闭
+##问
+##闯
+##闰
+##闲
+##间
+##闵
+##闷
+##闸
+##闹
+##闺
+##闻
+##闽
+##闾
+##阀
+##阁
+##阂
+##阅
+##阆
+##阇
+##阈
+##阉
+##阎
+##阐
+##阑
+##阔
+##阕
+##阖
+##阙
+##阚
+##阜
+##队
+##阡
+##阪
+##阮
+##阱
+##防
+##阳
+##阴
+##阵
+##阶
+##阻
+##阿
+##陀
+##陂
+##附
+##际
+##陆
+##陇
+##陈
+##陋
+##陌
+##降
+##限
+##陕
+##陛
+##陝
+##陞
+##陟
+##陡
+##院
+##陣
+##除
+##陨
+##险
+##陪
+##陰
+##陲
+##陳
+##陵
+##陶
+##陷
+##陸
+##険
+##陽
+##隅
+##隆
+##隈
+##隊
+##隋
+##隍
+##階
+##随
+##隐
+##隔
+##隕
+##隘
+##隙
+##際
+##障
+##隠
+##隣
+##隧
+##隨
+##險
+##隱
+##隴
+##隶
+##隸
+##隻
+##隼
+##隽
+##难
+##雀
+##雁
+##雄
+##雅
+##集
+##雇
+##雉
+##雋
+##雌
+##雍
+##雎
+##雏
+##雑
+##雒
+##雕
+##雖
+##雙
+##雛
+##雜
+##雞
+##離
+##難
+##雨
+##雪
+##雯
+##雰
+##雲
+##雳
+##零
+##雷
+##雹
+##電
+##雾
+##需
+##霁
+##霄
+##霆
+##震
+##霈
+##霉
+##霊
+##霍
+##霎
+##霏
+##霑
+##霓
+##霖
+##霜
+##霞
+##霧
+##霭
+##霰
+##露
+##霸
+##霹
+##霽
+##霾
+##靂
+##靄
+##靈
+##青
+##靓
+##靖
+##静
+##靚
+##靛
+##靜
+##非
+##靠
+##靡
+##面
+##靥
+##靦
+##革
+##靳
+##靴
+##靶
+##靼
+##鞅
+##鞋
+##鞍
+##鞏
+##鞑
+##鞘
+##鞠
+##鞣
+##鞦
+##鞭
+##韆
+##韋
+##韌
+##韓
+##韜
+##韦
+##韧
+##韩
+##韬
+##韭
+##音
+##韵
+##韶
+##韻
+##響
+##頁
+##頂
+##頃
+##項
+##順
+##須
+##頌
+##預
+##頑
+##頒
+##頓
+##頗
+##領
+##頜
+##頡
+##頤
+##頫
+##頭
+##頰
+##頷
+##頸
+##頹
+##頻
+##頼
+##顆
+##題
+##額
+##顎
+##顏
+##顔
+##願
+##顛
+##類
+##顧
+##顫
+##顯
+##顱
+##顴
+##页
+##顶
+##顷
+##项
+##顺
+##须
+##顼
+##顽
+##顾
+##顿
+##颁
+##颂
+##预
+##颅
+##领
+##颇
+##颈
+##颉
+##颊
+##颌
+##颍
+##颐
+##频
+##颓
+##颔
+##颖
+##颗
+##题
+##颚
+##颛
+##颜
+##额
+##颞
+##颠
+##颡
+##颢
+##颤
+##颦
+##颧
+##風
+##颯
+##颱
+##颳
+##颶
+##颼
+##飄
+##飆
+##风
+##飒
+##飓
+##飕
+##飘
+##飙
+##飚
+##飛
+##飞
+##食
+##飢
+##飨
+##飩
+##飪
+##飯
+##飲
+##飼
+##飽
+##飾
+##餃
+##餅
+##餉
+##養
+##餌
+##餐
+##餒
+##餓
+##餘
+##餚
+##餛
+##餞
+##餡
+##館
+##餮
+##餵
+##餾
+##饅
+##饈
+##饋
+##饌
+##饍
+##饑
+##饒
+##饕
+##饗
+##饞
+##饥
+##饨
+##饪
+##饬
+##饭
+##饮
+##饯
+##饰
+##饱
+##饲
+##饴
+##饵
+##饶
+##饷
+##饺
+##饼
+##饽
+##饿
+##馀
+##馁
+##馄
+##馅
+##馆
+##馈
+##馋
+##馍
+##馏
+##馒
+##馔
+##首
+##馗
+##香
+##馥
+##馨
+##馬
+##馭
+##馮
+##馳
+##馴
+##駁
+##駄
+##駅
+##駆
+##駐
+##駒
+##駕
+##駛
+##駝
+##駭
+##駱
+##駿
+##騁
+##騎
+##騏
+##験
+##騙
+##騨
+##騰
+##騷
+##驀
+##驅
+##驊
+##驍
+##驒
+##驕
+##驗
+##驚
+##驛
+##驟
+##驢
+##驥
+##马
+##驭
+##驮
+##驯
+##驰
+##驱
+##驳
+##驴
+##驶
+##驷
+##驸
+##驹
+##驻
+##驼
+##驾
+##驿
+##骁
+##骂
+##骄
+##骅
+##骆
+##骇
+##骈
+##骊
+##骋
+##验
+##骏
+##骐
+##骑
+##骗
+##骚
+##骛
+##骜
+##骞
+##骠
+##骡
+##骤
+##骥
+##骧
+##骨
+##骯
+##骰
+##骶
+##骷
+##骸
+##骼
+##髂
+##髅
+##髋
+##髏
+##髒
+##髓
+##體
+##髖
+##高
+##髦
+##髪
+##髮
+##髯
+##髻
+##鬃
+##鬆
+##鬍
+##鬓
+##鬚
+##鬟
+##鬢
+##鬣
+##鬥
+##鬧
+##鬱
+##鬼
+##魁
+##魂
+##魄
+##魅
+##魇
+##魍
+##魏
+##魔
+##魘
+##魚
+##魯
+##魷
+##鮑
+##鮨
+##鮪
+##鮭
+##鮮
+##鯉
+##鯊
+##鯖
+##鯛
+##鯨
+##鯰
+##鯽
+##鰍
+##鰓
+##鰭
+##鰲
+##鰻
+##鰾
+##鱈
+##鱉
+##鱔
+##鱗
+##鱷
+##鱸
+##鱼
+##鱿
+##鲁
+##鲈
+##鲍
+##鲑
+##鲛
+##鲜
+##鲟
+##鲢
+##鲤
+##鲨
+##鲫
+##鲱
+##鲲
+##鲶
+##鲷
+##鲸
+##鳃
+##鳄
+##鳅
+##鳌
+##鳍
+##鳕
+##鳖
+##鳗
+##鳝
+##鳞
+##鳥
+##鳩
+##鳳
+##鳴
+##鳶
+##鴉
+##鴕
+##鴛
+##鴦
+##鴨
+##鴻
+##鴿
+##鵑
+##鵜
+##鵝
+##鵡
+##鵬
+##鵰
+##鵲
+##鶘
+##鶩
+##鶯
+##鶴
+##鷗
+##鷲
+##鷹
+##鷺
+##鸚
+##鸞
+##鸟
+##鸠
+##鸡
+##鸢
+##鸣
+##鸥
+##鸦
+##鸨
+##鸪
+##鸭
+##鸯
+##鸳
+##鸵
+##鸽
+##鸾
+##鸿
+##鹂
+##鹃
+##鹄
+##鹅
+##鹈
+##鹉
+##鹊
+##鹌
+##鹏
+##鹑
+##鹕
+##鹘
+##鹜
+##鹞
+##鹤
+##鹦
+##鹧
+##鹫
+##鹭
+##鹰
+##鹳
+##鹵
+##鹹
+##鹼
+##鹽
+##鹿
+##麂
+##麋
+##麒
+##麓
+##麗
+##麝
+##麟
+##麥
+##麦
+##麩
+##麴
+##麵
+##麸
+##麺
+##麻
+##麼
+##麽
+##麾
+##黃
+##黄
+##黍
+##黎
+##黏
+##黑
+##黒
+##黔
+##默
+##黛
+##黜
+##黝
+##點
+##黠
+##黨
+##黯
+##黴
+##鼋
+##鼎
+##鼐
+##鼓
+##鼠
+##鼬
+##鼹
+##鼻
+##鼾
+##齁
+##齊
+##齋
+##齐
+##齒
+##齡
+##齢
+##齣
+##齦
+##齿
+##龄
+##龅
+##龈
+##龊
+##龋
+##龌
+##龍
+##龐
+##龔
+##龕
+##龙
+##龚
+##龛
+##龜
+##龟
+##︰
+##︱
+##︶
+##︿
+##﹁
+##﹂
+##﹍
+##﹏
+##﹐
+##﹑
+##﹒
+##﹔
+##﹕
+##﹖
+##﹗
+##﹙
+##﹚
+##﹝
+##﹞
+##﹡
+##﹣
+##!
+##"
+###
+##$
+##%
+##&
+##'
+##(
+##)
+##*
+##,
+##-
+##.
+##/
+##:
+##;
+##<
+##?
+##@
+##[
+##\
+##]
+##^
+##_
+##`
+##f
+##h
+##j
+##u
+##w
+##z
+##{
+##}
+##。
+##「
+##」
+##、
+##・
+##ッ
+##ー
+##イ
+##ク
+##シ
+##ス
+##ト
+##ノ
+##フ
+##ラ
+##ル
+##ン
+##゙
+##゚
+## ̄
+##¥
+##👍
+##🔥
+##😂
+##😎
diff --git a/create_pretraining_data.py b/create_pretraining_data.py
new file mode 100644
index 0000000..5340d96
--- /dev/null
+++ b/create_pretraining_data.py
@@ -0,0 +1,469 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Create masked LM/next sentence masked_lm TF examples for BERT."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import random
+import tokenization
+import tensorflow as tf
+
+flags = tf.flags
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_string("input_file", None,
+ "Input raw text file (or comma-separated list of files).")
+
+flags.DEFINE_string(
+ "output_file", None,
+ "Output TF example file (or comma-separated list of files).")
+
+flags.DEFINE_string("vocab_file", None,
+ "The vocabulary file that the BERT model was trained on.")
+
+flags.DEFINE_bool(
+ "do_lower_case", True,
+ "Whether to lower case the input text. Should be True for uncased "
+ "models and False for cased models.")
+
+flags.DEFINE_bool(
+ "do_whole_word_mask", False,
+ "Whether to use whole word masking rather than per-WordPiece masking.")
+
+flags.DEFINE_integer("max_seq_length", 128, "Maximum sequence length.")
+
+flags.DEFINE_integer("max_predictions_per_seq", 20,
+ "Maximum number of masked LM predictions per sequence.")
+
+flags.DEFINE_integer("random_seed", 12345, "Random seed for data generation.")
+
+flags.DEFINE_integer(
+ "dupe_factor", 10,
+ "Number of times to duplicate the input data (with different masks).")
+
+flags.DEFINE_float("masked_lm_prob", 0.15, "Masked LM probability.")
+
+flags.DEFINE_float(
+ "short_seq_prob", 0.1,
+ "Probability of creating sequences which are shorter than the "
+ "maximum length.")
+
+
+class TrainingInstance(object):
+ """A single training instance (sentence pair)."""
+
+ def __init__(self, tokens, segment_ids, masked_lm_positions, masked_lm_labels,
+ is_random_next):
+ self.tokens = tokens
+ self.segment_ids = segment_ids
+ self.is_random_next = is_random_next
+ self.masked_lm_positions = masked_lm_positions
+ self.masked_lm_labels = masked_lm_labels
+
+ def __str__(self):
+ s = ""
+ s += "tokens: %s\n" % (" ".join(
+ [tokenization.printable_text(x) for x in self.tokens]))
+ s += "segment_ids: %s\n" % (" ".join([str(x) for x in self.segment_ids]))
+ s += "is_random_next: %s\n" % self.is_random_next
+ s += "masked_lm_positions: %s\n" % (" ".join(
+ [str(x) for x in self.masked_lm_positions]))
+ s += "masked_lm_labels: %s\n" % (" ".join(
+ [tokenization.printable_text(x) for x in self.masked_lm_labels]))
+ s += "\n"
+ return s
+
+ def __repr__(self):
+ return self.__str__()
+
+
+def write_instance_to_example_files(instances, tokenizer, max_seq_length,
+ max_predictions_per_seq, output_files):
+ """Create TF example files from `TrainingInstance`s."""
+ writers = []
+ for output_file in output_files:
+ writers.append(tf.python_io.TFRecordWriter(output_file))
+
+ writer_index = 0
+
+ total_written = 0
+ for (inst_index, instance) in enumerate(instances):
+ input_ids = tokenizer.convert_tokens_to_ids(instance.tokens)
+ input_mask = [1] * len(input_ids)
+ segment_ids = list(instance.segment_ids)
+ assert len(input_ids) <= max_seq_length
+
+ while len(input_ids) < max_seq_length:
+ input_ids.append(0)
+ input_mask.append(0)
+ segment_ids.append(0)
+
+ assert len(input_ids) == max_seq_length
+ assert len(input_mask) == max_seq_length
+ assert len(segment_ids) == max_seq_length
+
+ masked_lm_positions = list(instance.masked_lm_positions)
+ masked_lm_ids = tokenizer.convert_tokens_to_ids(instance.masked_lm_labels)
+ masked_lm_weights = [1.0] * len(masked_lm_ids)
+
+ while len(masked_lm_positions) < max_predictions_per_seq:
+ masked_lm_positions.append(0)
+ masked_lm_ids.append(0)
+ masked_lm_weights.append(0.0)
+
+ next_sentence_label = 1 if instance.is_random_next else 0
+
+ features = collections.OrderedDict()
+ features["input_ids"] = create_int_feature(input_ids)
+ features["input_mask"] = create_int_feature(input_mask)
+ features["segment_ids"] = create_int_feature(segment_ids)
+ features["masked_lm_positions"] = create_int_feature(masked_lm_positions)
+ features["masked_lm_ids"] = create_int_feature(masked_lm_ids)
+ features["masked_lm_weights"] = create_float_feature(masked_lm_weights)
+ features["next_sentence_labels"] = create_int_feature([next_sentence_label])
+
+ tf_example = tf.train.Example(features=tf.train.Features(feature=features))
+
+ writers[writer_index].write(tf_example.SerializeToString())
+ writer_index = (writer_index + 1) % len(writers)
+
+ total_written += 1
+
+ if inst_index < 20:
+ tf.logging.info("*** Example ***")
+ tf.logging.info("tokens: %s" % " ".join(
+ [tokenization.printable_text(x) for x in instance.tokens]))
+
+ for feature_name in features.keys():
+ feature = features[feature_name]
+ values = []
+ if feature.int64_list.value:
+ values = feature.int64_list.value
+ elif feature.float_list.value:
+ values = feature.float_list.value
+ tf.logging.info(
+ "%s: %s" % (feature_name, " ".join([str(x) for x in values])))
+
+ for writer in writers:
+ writer.close()
+
+ tf.logging.info("Wrote %d total instances", total_written)
+
+
+def create_int_feature(values):
+ feature = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values)))
+ return feature
+
+
+def create_float_feature(values):
+ feature = tf.train.Feature(float_list=tf.train.FloatList(value=list(values)))
+ return feature
+
+
+def create_training_instances(input_files, tokenizer, max_seq_length,
+ dupe_factor, short_seq_prob, masked_lm_prob,
+ max_predictions_per_seq, rng):
+ """Create `TrainingInstance`s from raw text."""
+ all_documents = [[]]
+
+ # Input file format:
+ # (1) One sentence per line. These should ideally be actual sentences, not
+ # entire paragraphs or arbitrary spans of text. (Because we use the
+ # sentence boundaries for the "next sentence prediction" task).
+ # (2) Blank lines between documents. Document boundaries are needed so
+ # that the "next sentence prediction" task doesn't span between documents.
+ for input_file in input_files:
+ with tf.gfile.GFile(input_file, "r") as reader:
+ while True:
+ line = tokenization.convert_to_unicode(reader.readline())
+ if not line:
+ break
+ line = line.strip()
+
+ # Empty lines are used as document delimiters
+ if not line:
+ all_documents.append([])
+ tokens = tokenizer.tokenize(line)
+ if tokens:
+ all_documents[-1].append(tokens)
+
+ # Remove empty documents
+ all_documents = [x for x in all_documents if x]
+ rng.shuffle(all_documents)
+
+ vocab_words = list(tokenizer.vocab.keys())
+ instances = []
+ for _ in range(dupe_factor):
+ for document_index in range(len(all_documents)):
+ instances.extend(
+ create_instances_from_document(
+ all_documents, document_index, max_seq_length, short_seq_prob,
+ masked_lm_prob, max_predictions_per_seq, vocab_words, rng))
+
+ rng.shuffle(instances)
+ return instances
+
+
+def create_instances_from_document(
+ all_documents, document_index, max_seq_length, short_seq_prob,
+ masked_lm_prob, max_predictions_per_seq, vocab_words, rng):
+ """Creates `TrainingInstance`s for a single document."""
+ document = all_documents[document_index]
+
+ # Account for [CLS], [SEP], [SEP]
+ max_num_tokens = max_seq_length - 3
+
+ # We *usually* want to fill up the entire sequence since we are padding
+ # to `max_seq_length` anyways, so short sequences are generally wasted
+ # computation. However, we *sometimes*
+ # (i.e., short_seq_prob == 0.1 == 10% of the time) want to use shorter
+ # sequences to minimize the mismatch between pre-training and fine-tuning.
+ # The `target_seq_length` is just a rough target however, whereas
+ # `max_seq_length` is a hard limit.
+ target_seq_length = max_num_tokens
+ if rng.random() < short_seq_prob:
+ target_seq_length = rng.randint(2, max_num_tokens)
+
+ # We DON'T just concatenate all of the tokens from a document into a long
+ # sequence and choose an arbitrary split point because this would make the
+ # next sentence prediction task too easy. Instead, we split the input into
+ # segments "A" and "B" based on the actual "sentences" provided by the user
+ # input.
+ instances = []
+ current_chunk = []
+ current_length = 0
+ i = 0
+ while i < len(document):
+ segment = document[i]
+ current_chunk.append(segment)
+ current_length += len(segment)
+ if i == len(document) - 1 or current_length >= target_seq_length:
+ if current_chunk:
+ # `a_end` is how many segments from `current_chunk` go into the `A`
+ # (first) sentence.
+ a_end = 1
+ if len(current_chunk) >= 2:
+ a_end = rng.randint(1, len(current_chunk) - 1)
+
+ tokens_a = []
+ for j in range(a_end):
+ tokens_a.extend(current_chunk[j])
+
+ tokens_b = []
+ # Random next
+ is_random_next = False
+ if len(current_chunk) == 1 or rng.random() < 0.5:
+ is_random_next = True
+ target_b_length = target_seq_length - len(tokens_a)
+
+ # This should rarely go for more than one iteration for large
+ # corpora. However, just to be careful, we try to make sure that
+ # the random document is not the same as the document
+ # we're processing.
+ for _ in range(10):
+ random_document_index = rng.randint(0, len(all_documents) - 1)
+ if random_document_index != document_index:
+ break
+
+ random_document = all_documents[random_document_index]
+ random_start = rng.randint(0, len(random_document) - 1)
+ for j in range(random_start, len(random_document)):
+ tokens_b.extend(random_document[j])
+ if len(tokens_b) >= target_b_length:
+ break
+ # We didn't actually use these segments so we "put them back" so
+ # they don't go to waste.
+ num_unused_segments = len(current_chunk) - a_end
+ i -= num_unused_segments
+ # Actual next
+ else:
+ is_random_next = False
+ for j in range(a_end, len(current_chunk)):
+ tokens_b.extend(current_chunk[j])
+ truncate_seq_pair(tokens_a, tokens_b, max_num_tokens, rng)
+
+ assert len(tokens_a) >= 1
+ assert len(tokens_b) >= 1
+
+ tokens = []
+ segment_ids = []
+ tokens.append("[CLS]")
+ segment_ids.append(0)
+ for token in tokens_a:
+ tokens.append(token)
+ segment_ids.append(0)
+
+ tokens.append("[SEP]")
+ segment_ids.append(0)
+
+ for token in tokens_b:
+ tokens.append(token)
+ segment_ids.append(1)
+ tokens.append("[SEP]")
+ segment_ids.append(1)
+
+ (tokens, masked_lm_positions,
+ masked_lm_labels) = create_masked_lm_predictions(
+ tokens, masked_lm_prob, max_predictions_per_seq, vocab_words, rng)
+ instance = TrainingInstance(
+ tokens=tokens,
+ segment_ids=segment_ids,
+ is_random_next=is_random_next,
+ masked_lm_positions=masked_lm_positions,
+ masked_lm_labels=masked_lm_labels)
+ instances.append(instance)
+ current_chunk = []
+ current_length = 0
+ i += 1
+
+ return instances
+
+
+MaskedLmInstance = collections.namedtuple("MaskedLmInstance",
+ ["index", "label"])
+
+
+def create_masked_lm_predictions(tokens, masked_lm_prob,
+ max_predictions_per_seq, vocab_words, rng):
+ """Creates the predictions for the masked LM objective."""
+
+ cand_indexes = []
+ for (i, token) in enumerate(tokens):
+ if token == "[CLS]" or token == "[SEP]":
+ continue
+ # Whole Word Masking means that if we mask all of the wordpieces
+ # corresponding to an original word. When a word has been split into
+ # WordPieces, the first token does not have any marker and any subsequence
+ # tokens are prefixed with ##. So whenever we see the ## token, we
+ # append it to the previous set of word indexes.
+ #
+ # Note that Whole Word Masking does *not* change the training code
+ # at all -- we still predict each WordPiece independently, softmaxed
+ # over the entire vocabulary.
+ if (FLAGS.do_whole_word_mask and len(cand_indexes) >= 1 and
+ token.startswith("##")):
+ cand_indexes[-1].append(i)
+ else:
+ cand_indexes.append([i])
+
+ rng.shuffle(cand_indexes)
+
+ output_tokens = list(tokens)
+
+ num_to_predict = min(max_predictions_per_seq,
+ max(1, int(round(len(tokens) * masked_lm_prob))))
+
+ masked_lms = []
+ covered_indexes = set()
+ for index_set in cand_indexes:
+ if len(masked_lms) >= num_to_predict:
+ break
+ # If adding a whole-word mask would exceed the maximum number of
+ # predictions, then just skip this candidate.
+ if len(masked_lms) + len(index_set) > num_to_predict:
+ continue
+ is_any_index_covered = False
+ for index in index_set:
+ if index in covered_indexes:
+ is_any_index_covered = True
+ break
+ if is_any_index_covered:
+ continue
+ for index in index_set:
+ covered_indexes.add(index)
+
+ masked_token = None
+ # 80% of the time, replace with [MASK]
+ if rng.random() < 0.8:
+ masked_token = "[MASK]"
+ else:
+ # 10% of the time, keep original
+ if rng.random() < 0.5:
+ masked_token = tokens[index]
+ # 10% of the time, replace with random word
+ else:
+ masked_token = vocab_words[rng.randint(0, len(vocab_words) - 1)]
+
+ output_tokens[index] = masked_token
+
+ masked_lms.append(MaskedLmInstance(index=index, label=tokens[index]))
+ assert len(masked_lms) <= num_to_predict
+ masked_lms = sorted(masked_lms, key=lambda x: x.index)
+
+ masked_lm_positions = []
+ masked_lm_labels = []
+ for p in masked_lms:
+ masked_lm_positions.append(p.index)
+ masked_lm_labels.append(p.label)
+
+ return (output_tokens, masked_lm_positions, masked_lm_labels)
+
+
+def truncate_seq_pair(tokens_a, tokens_b, max_num_tokens, rng):
+ """Truncates a pair of sequences to a maximum sequence length."""
+ while True:
+ total_length = len(tokens_a) + len(tokens_b)
+ if total_length <= max_num_tokens:
+ break
+
+ trunc_tokens = tokens_a if len(tokens_a) > len(tokens_b) else tokens_b
+ assert len(trunc_tokens) >= 1
+
+ # We want to sometimes truncate from the front and sometimes from the
+ # back to add more randomness and avoid biases.
+ if rng.random() < 0.5:
+ del trunc_tokens[0]
+ else:
+ trunc_tokens.pop()
+
+
+def main(_):
+ tf.logging.set_verbosity(tf.logging.INFO)
+
+ tokenizer = tokenization.FullTokenizer(
+ vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case)
+
+ input_files = []
+ for input_pattern in FLAGS.input_file.split(","):
+ input_files.extend(tf.gfile.Glob(input_pattern))
+
+ tf.logging.info("*** Reading from input files ***")
+ for input_file in input_files:
+ tf.logging.info(" %s", input_file)
+
+ rng = random.Random(FLAGS.random_seed)
+ instances = create_training_instances(
+ input_files, tokenizer, FLAGS.max_seq_length, FLAGS.dupe_factor,
+ FLAGS.short_seq_prob, FLAGS.masked_lm_prob, FLAGS.max_predictions_per_seq,
+ rng)
+
+ output_files = FLAGS.output_file.split(",")
+ tf.logging.info("*** Writing to output files ***")
+ for output_file in output_files:
+ tf.logging.info(" %s", output_file)
+
+ write_instance_to_example_files(instances, tokenizer, FLAGS.max_seq_length,
+ FLAGS.max_predictions_per_seq, output_files)
+
+
+if __name__ == "__main__":
+ flags.mark_flag_as_required("input_file")
+ flags.mark_flag_as_required("output_file")
+ flags.mark_flag_as_required("vocab_file")
+ tf.app.run()
diff --git a/dealing_dataset.py b/dealing_dataset.py
new file mode 100644
index 0000000..d8db627
--- /dev/null
+++ b/dealing_dataset.py
@@ -0,0 +1,49 @@
+import sqlite3
+
+conn = sqlite3.connect(r"nlpdata.db")\
+
+
+def create_dataset_ep(table):
+ cursor = conn.cursor()
+ sql = "select * from " + table + " LIMIT 20"
+ cursor.execute(sql)
+ conn.commit()
+
+ dataset = []
+
+ for row in cursor:
+ eid = row[0]
+ tag = row[1]
+ content = row[2]
+ if tag == "5" or tag == "4":
+ dataset.append([eid, 2, content])
+ print(eid, 2, content)
+ elif tag == "1" or tag == "2":
+ dataset.append([eid, 0, content])
+ print(eid, 0, content)
+ else:
+ dataset.append([eid, 1, content])
+ print(eid, 1, content)
+ return dataset
+
+
+def create_dataset_pdt():
+ conn_pdt = sqlite3.connect(r".\bptdata.db")
+ cursor = conn_pdt.cursor()
+ sql = "select * from " + "predict_data"
+ cursor.execute(sql)
+ conn_pdt.commit()
+
+ dataset = []
+
+ for row in cursor:
+ stnid = row[0]
+ text = row[1]
+ dataset.append([stnid, 0, text])
+ print(stnid, 0, text)
+
+ return dataset
+
+
+if __name__ == '__main__':
+ print(create_dataset_ep("amki_test"))
\ No newline at end of file
diff --git a/extract_features.py b/extract_features.py
new file mode 100644
index 0000000..60e3830
--- /dev/null
+++ b/extract_features.py
@@ -0,0 +1,419 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Extract pre-computed feature vectors from BERT."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import codecs
+import collections
+import json
+import re
+
+import modeling
+import tokenization
+import tensorflow as tf
+
+flags = tf.flags
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_string("input_file", None, "")
+
+flags.DEFINE_string("output_file", None, "")
+
+flags.DEFINE_string("layers", "-1,-2,-3,-4", "")
+
+flags.DEFINE_string(
+ "bert_config_file", None,
+ "The config json file corresponding to the pre-trained BERT model. "
+ "This specifies the model architecture.")
+
+flags.DEFINE_integer(
+ "max_seq_length", 128,
+ "The maximum total input sequence length after WordPiece tokenization. "
+ "Sequences longer than this will be truncated, and sequences shorter "
+ "than this will be padded.")
+
+flags.DEFINE_string(
+ "init_checkpoint", None,
+ "Initial checkpoint (usually from a pre-trained BERT model).")
+
+flags.DEFINE_string("vocab_file", None,
+ "The vocabulary file that the BERT model was trained on.")
+
+flags.DEFINE_bool(
+ "do_lower_case", True,
+ "Whether to lower case the input text. Should be True for uncased "
+ "models and False for cased models.")
+
+flags.DEFINE_integer("batch_size", 32, "Batch size for predictions.")
+
+flags.DEFINE_bool("use_tpu", False, "Whether to use TPU or GPU/CPU.")
+
+flags.DEFINE_string("master", None,
+ "If using a TPU, the address of the master.")
+
+flags.DEFINE_integer(
+ "num_tpu_cores", 8,
+ "Only used if `use_tpu` is True. Total number of TPU cores to use.")
+
+flags.DEFINE_bool(
+ "use_one_hot_embeddings", False,
+ "If True, tf.one_hot will be used for embedding lookups, otherwise "
+ "tf.nn.embedding_lookup will be used. On TPUs, this should be True "
+ "since it is much faster.")
+
+
+class InputExample(object):
+
+ def __init__(self, unique_id, text_a, text_b):
+ self.unique_id = unique_id
+ self.text_a = text_a
+ self.text_b = text_b
+
+
+class InputFeatures(object):
+ """A single set of features of data."""
+
+ def __init__(self, unique_id, tokens, input_ids, input_mask, input_type_ids):
+ self.unique_id = unique_id
+ self.tokens = tokens
+ self.input_ids = input_ids
+ self.input_mask = input_mask
+ self.input_type_ids = input_type_ids
+
+
+def input_fn_builder(features, seq_length):
+ """Creates an `input_fn` closure to be passed to TPUEstimator."""
+
+ all_unique_ids = []
+ all_input_ids = []
+ all_input_mask = []
+ all_input_type_ids = []
+
+ for feature in features:
+ all_unique_ids.append(feature.unique_id)
+ all_input_ids.append(feature.input_ids)
+ all_input_mask.append(feature.input_mask)
+ all_input_type_ids.append(feature.input_type_ids)
+
+ def input_fn(params):
+ """The actual input function."""
+ batch_size = params["batch_size"]
+
+ num_examples = len(features)
+
+ # This is for demo purposes and does NOT scale to large data sets. We do
+ # not use Dataset.from_generator() because that uses tf.py_func which is
+ # not TPU compatible. The right way to load data is with TFRecordReader.
+ d = tf.data.Dataset.from_tensor_slices({
+ "unique_ids":
+ tf.constant(all_unique_ids, shape=[num_examples], dtype=tf.int32),
+ "input_ids":
+ tf.constant(
+ all_input_ids, shape=[num_examples, seq_length],
+ dtype=tf.int32),
+ "input_mask":
+ tf.constant(
+ all_input_mask,
+ shape=[num_examples, seq_length],
+ dtype=tf.int32),
+ "input_type_ids":
+ tf.constant(
+ all_input_type_ids,
+ shape=[num_examples, seq_length],
+ dtype=tf.int32),
+ })
+
+ d = d.batch(batch_size=batch_size, drop_remainder=False)
+ return d
+
+ return input_fn
+
+
+def model_fn_builder(bert_config, init_checkpoint, layer_indexes, use_tpu,
+ use_one_hot_embeddings):
+ """Returns `model_fn` closure for TPUEstimator."""
+
+ def model_fn(features, labels, mode, params): # pylint: disable=unused-argument
+ """The `model_fn` for TPUEstimator."""
+
+ unique_ids = features["unique_ids"]
+ input_ids = features["input_ids"]
+ input_mask = features["input_mask"]
+ input_type_ids = features["input_type_ids"]
+
+ model = modeling.BertModel(
+ config=bert_config,
+ is_training=False,
+ input_ids=input_ids,
+ input_mask=input_mask,
+ token_type_ids=input_type_ids,
+ use_one_hot_embeddings=use_one_hot_embeddings)
+
+ if mode != tf.estimator.ModeKeys.PREDICT:
+ raise ValueError("Only PREDICT modes are supported: %s" % (mode))
+
+ tvars = tf.trainable_variables()
+ scaffold_fn = None
+ (assignment_map,
+ initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(
+ tvars, init_checkpoint)
+ if use_tpu:
+
+ def tpu_scaffold():
+ tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
+ return tf.train.Scaffold()
+
+ scaffold_fn = tpu_scaffold
+ else:
+ tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
+
+ tf.logging.info("**** Trainable Variables ****")
+ for var in tvars:
+ init_string = ""
+ if var.name in initialized_variable_names:
+ init_string = ", *INIT_FROM_CKPT*"
+ tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape,
+ init_string)
+
+ all_layers = model.get_all_encoder_layers()
+
+ predictions = {
+ "unique_id": unique_ids,
+ }
+
+ for (i, layer_index) in enumerate(layer_indexes):
+ predictions["layer_output_%d" % i] = all_layers[layer_index]
+
+ output_spec = tf.contrib.tpu.TPUEstimatorSpec(
+ mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)
+ return output_spec
+
+ return model_fn
+
+
+def convert_examples_to_features(examples, seq_length, tokenizer):
+ """Loads a data file into a list of `InputBatch`s."""
+
+ features = []
+ for (ex_index, example) in enumerate(examples):
+ tokens_a = tokenizer.tokenize(example.text_a)
+
+ tokens_b = None
+ if example.text_b:
+ tokens_b = tokenizer.tokenize(example.text_b)
+
+ if tokens_b:
+ # Modifies `tokens_a` and `tokens_b` in place so that the total
+ # length is less than the specified length.
+ # Account for [CLS], [SEP], [SEP] with "- 3"
+ _truncate_seq_pair(tokens_a, tokens_b, seq_length - 3)
+ else:
+ # Account for [CLS] and [SEP] with "- 2"
+ if len(tokens_a) > seq_length - 2:
+ tokens_a = tokens_a[0:(seq_length - 2)]
+
+ # The convention in BERT is:
+ # (a) For sequence pairs:
+ # tokens: [CLS] is this jack ##son ##ville ? [SEP] no it is not . [SEP]
+ # type_ids: 0 0 0 0 0 0 0 0 1 1 1 1 1 1
+ # (b) For single sequences:
+ # tokens: [CLS] the dog is hairy . [SEP]
+ # type_ids: 0 0 0 0 0 0 0
+ #
+ # Where "type_ids" are used to indicate whether this is the first
+ # sequence or the second sequence. The embedding vectors for `type=0` and
+ # `type=1` were learned during pre-training and are added to the wordpiece
+ # embedding vector (and position vector). This is not *strictly* necessary
+ # since the [SEP] token unambiguously separates the sequences, but it makes
+ # it easier for the model to learn the concept of sequences.
+ #
+ # For classification tasks, the first vector (corresponding to [CLS]) is
+ # used as as the "sentence vector". Note that this only makes sense because
+ # the entire model is fine-tuned.
+ tokens = []
+ input_type_ids = []
+ tokens.append("[CLS]")
+ input_type_ids.append(0)
+ for token in tokens_a:
+ tokens.append(token)
+ input_type_ids.append(0)
+ tokens.append("[SEP]")
+ input_type_ids.append(0)
+
+ if tokens_b:
+ for token in tokens_b:
+ tokens.append(token)
+ input_type_ids.append(1)
+ tokens.append("[SEP]")
+ input_type_ids.append(1)
+
+ input_ids = tokenizer.convert_tokens_to_ids(tokens)
+
+ # The mask has 1 for real tokens and 0 for padding tokens. Only real
+ # tokens are attended to.
+ input_mask = [1] * len(input_ids)
+
+ # Zero-pad up to the sequence length.
+ while len(input_ids) < seq_length:
+ input_ids.append(0)
+ input_mask.append(0)
+ input_type_ids.append(0)
+
+ assert len(input_ids) == seq_length
+ assert len(input_mask) == seq_length
+ assert len(input_type_ids) == seq_length
+
+ if ex_index < 5:
+ tf.logging.info("*** Example ***")
+ tf.logging.info("unique_id: %s" % (example.unique_id))
+ tf.logging.info("tokens: %s" % " ".join(
+ [tokenization.printable_text(x) for x in tokens]))
+ tf.logging.info("input_ids: %s" % " ".join([str(x) for x in input_ids]))
+ tf.logging.info("input_mask: %s" % " ".join([str(x) for x in input_mask]))
+ tf.logging.info(
+ "input_type_ids: %s" % " ".join([str(x) for x in input_type_ids]))
+
+ features.append(
+ InputFeatures(
+ unique_id=example.unique_id,
+ tokens=tokens,
+ input_ids=input_ids,
+ input_mask=input_mask,
+ input_type_ids=input_type_ids))
+ return features
+
+
+def _truncate_seq_pair(tokens_a, tokens_b, max_length):
+ """Truncates a sequence pair in place to the maximum length."""
+
+ # This is a simple heuristic which will always truncate the longer sequence
+ # one token at a time. This makes more sense than truncating an equal percent
+ # of tokens from each, since if one sequence is very short then each token
+ # that's truncated likely contains more information than a longer sequence.
+ while True:
+ total_length = len(tokens_a) + len(tokens_b)
+ if total_length <= max_length:
+ break
+ if len(tokens_a) > len(tokens_b):
+ tokens_a.pop()
+ else:
+ tokens_b.pop()
+
+
+def read_examples(input_file):
+ """Read a list of `InputExample`s from an input file."""
+ examples = []
+ unique_id = 0
+ with tf.gfile.GFile(input_file, "r") as reader:
+ while True:
+ line = tokenization.convert_to_unicode(reader.readline())
+ if not line:
+ break
+ line = line.strip()
+ text_a = None
+ text_b = None
+ m = re.match(r"^(.*) \|\|\| (.*)$", line)
+ if m is None:
+ text_a = line
+ else:
+ text_a = m.group(1)
+ text_b = m.group(2)
+ examples.append(
+ InputExample(unique_id=unique_id, text_a=text_a, text_b=text_b))
+ unique_id += 1
+ return examples
+
+
+def main(_):
+ tf.logging.set_verbosity(tf.logging.INFO)
+
+ layer_indexes = [int(x) for x in FLAGS.layers.split(",")]
+
+ bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)
+
+ tokenizer = tokenization.FullTokenizer(
+ vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case)
+
+ is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
+ run_config = tf.contrib.tpu.RunConfig(
+ master=FLAGS.master,
+ tpu_config=tf.contrib.tpu.TPUConfig(
+ num_shards=FLAGS.num_tpu_cores,
+ per_host_input_for_training=is_per_host))
+
+ examples = read_examples(FLAGS.input_file)
+
+ features = convert_examples_to_features(
+ examples=examples, seq_length=FLAGS.max_seq_length, tokenizer=tokenizer)
+
+ unique_id_to_feature = {}
+ for feature in features:
+ unique_id_to_feature[feature.unique_id] = feature
+
+ model_fn = model_fn_builder(
+ bert_config=bert_config,
+ init_checkpoint=FLAGS.init_checkpoint,
+ layer_indexes=layer_indexes,
+ use_tpu=FLAGS.use_tpu,
+ use_one_hot_embeddings=FLAGS.use_one_hot_embeddings)
+
+ # If TPU is not available, this will fall back to normal Estimator on CPU
+ # or GPU.
+ estimator = tf.contrib.tpu.TPUEstimator(
+ use_tpu=FLAGS.use_tpu,
+ model_fn=model_fn,
+ config=run_config,
+ predict_batch_size=FLAGS.batch_size)
+
+ input_fn = input_fn_builder(
+ features=features, seq_length=FLAGS.max_seq_length)
+
+ with codecs.getwriter("utf-8")(tf.gfile.Open(FLAGS.output_file,
+ "w")) as writer:
+ for result in estimator.predict(input_fn, yield_single_examples=True):
+ unique_id = int(result["unique_id"])
+ feature = unique_id_to_feature[unique_id]
+ output_json = collections.OrderedDict()
+ output_json["linex_index"] = unique_id
+ all_features = []
+ for (i, token) in enumerate(feature.tokens):
+ all_layers = []
+ for (j, layer_index) in enumerate(layer_indexes):
+ layer_output = result["layer_output_%d" % j]
+ layers = collections.OrderedDict()
+ layers["index"] = layer_index
+ layers["values"] = [
+ round(float(x), 6) for x in layer_output[i:(i + 1)].flat
+ ]
+ all_layers.append(layers)
+ features = collections.OrderedDict()
+ features["token"] = token
+ features["layers"] = all_layers
+ all_features.append(features)
+ output_json["features"] = all_features
+ writer.write(json.dumps(output_json) + "\n")
+
+
+if __name__ == "__main__":
+ flags.mark_flag_as_required("input_file")
+ flags.mark_flag_as_required("vocab_file")
+ flags.mark_flag_as_required("bert_config_file")
+ flags.mark_flag_as_required("init_checkpoint")
+ flags.mark_flag_as_required("output_file")
+ tf.app.run()
diff --git a/modeling.py b/modeling.py
new file mode 100644
index 0000000..fed5259
--- /dev/null
+++ b/modeling.py
@@ -0,0 +1,986 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""The main BERT model and related functions."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import copy
+import json
+import math
+import re
+import numpy as np
+import six
+import tensorflow as tf
+
+
+class BertConfig(object):
+ """Configuration for `BertModel`."""
+
+ def __init__(self,
+ vocab_size,
+ hidden_size=768,
+ num_hidden_layers=12,
+ num_attention_heads=12,
+ intermediate_size=3072,
+ hidden_act="gelu",
+ hidden_dropout_prob=0.1,
+ attention_probs_dropout_prob=0.1,
+ max_position_embeddings=512,
+ type_vocab_size=16,
+ initializer_range=0.02):
+ """Constructs BertConfig.
+
+ Args:
+ vocab_size: Vocabulary size of `inputs_ids` in `BertModel`.
+ hidden_size: Size of the encoder layers and the pooler layer.
+ num_hidden_layers: Number of hidden layers in the Transformer encoder.
+ num_attention_heads: Number of attention heads for each attention layer in
+ the Transformer encoder.
+ intermediate_size: The size of the "intermediate" (i.e., feed-forward)
+ layer in the Transformer encoder.
+ hidden_act: The non-linear activation function (function or string) in the
+ encoder and pooler.
+ hidden_dropout_prob: The dropout probability for all fully connected
+ layers in the embeddings, encoder, and pooler.
+ attention_probs_dropout_prob: The dropout ratio for the attention
+ probabilities.
+ max_position_embeddings: The maximum sequence length that this model might
+ ever be used with. Typically set this to something large just in case
+ (e.g., 512 or 1024 or 2048).
+ type_vocab_size: The vocabulary size of the `token_type_ids` passed into
+ `BertModel`.
+ initializer_range: The stdev of the truncated_normal_initializer for
+ initializing all weight matrices.
+ """
+ self.vocab_size = vocab_size
+ self.hidden_size = hidden_size
+ self.num_hidden_layers = num_hidden_layers
+ self.num_attention_heads = num_attention_heads
+ self.hidden_act = hidden_act
+ self.intermediate_size = intermediate_size
+ self.hidden_dropout_prob = hidden_dropout_prob
+ self.attention_probs_dropout_prob = attention_probs_dropout_prob
+ self.max_position_embeddings = max_position_embeddings
+ self.type_vocab_size = type_vocab_size
+ self.initializer_range = initializer_range
+
+ @classmethod
+ def from_dict(cls, json_object):
+ """Constructs a `BertConfig` from a Python dictionary of parameters."""
+ config = BertConfig(vocab_size=None)
+ for (key, value) in six.iteritems(json_object):
+ config.__dict__[key] = value
+ return config
+
+ @classmethod
+ def from_json_file(cls, json_file):
+ """Constructs a `BertConfig` from a json file of parameters."""
+ with tf.gfile.GFile(json_file, "r") as reader:
+ text = reader.read()
+ return cls.from_dict(json.loads(text))
+
+ def to_dict(self):
+ """Serializes this instance to a Python dictionary."""
+ output = copy.deepcopy(self.__dict__)
+ return output
+
+ def to_json_string(self):
+ """Serializes this instance to a JSON string."""
+ return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n"
+
+
+class BertModel(object):
+ """BERT model ("Bidirectional Encoder Representations from Transformers").
+
+ Example usage:
+
+ ```python
+ # Already been converted into WordPiece token ids
+ input_ids = tf.constant([[31, 51, 99], [15, 5, 0]])
+ input_mask = tf.constant([[1, 1, 1], [1, 1, 0]])
+ token_type_ids = tf.constant([[0, 0, 1], [0, 2, 0]])
+
+ config = modeling.BertConfig(vocab_size=32000, hidden_size=512,
+ num_hidden_layers=8, num_attention_heads=6, intermediate_size=1024)
+
+ model = modeling.BertModel(config=config, is_training=True,
+ input_ids=input_ids, input_mask=input_mask, token_type_ids=token_type_ids)
+
+ label_embeddings = tf.get_variable(...)
+ pooled_output = model.get_pooled_output()
+ logits = tf.matmul(pooled_output, label_embeddings)
+ ...
+ ```
+ """
+
+ def __init__(self,
+ config,
+ is_training,
+ input_ids,
+ input_mask=None,
+ token_type_ids=None,
+ use_one_hot_embeddings=False,
+ scope=None):
+ """Constructor for BertModel.
+
+ Args:
+ config: `BertConfig` instance.
+ is_training: bool. true for training model, false for eval model. Controls
+ whether dropout will be applied.
+ input_ids: int32 Tensor of shape [batch_size, seq_length].
+ input_mask: (optional) int32 Tensor of shape [batch_size, seq_length].
+ token_type_ids: (optional) int32 Tensor of shape [batch_size, seq_length].
+ use_one_hot_embeddings: (optional) bool. Whether to use one-hot word
+ embeddings or tf.embedding_lookup() for the word embeddings.
+ scope: (optional) variable scope. Defaults to "bert".
+
+ Raises:
+ ValueError: The config is invalid or one of the input tensor shapes
+ is invalid.
+ """
+ config = copy.deepcopy(config)
+ if not is_training:
+ config.hidden_dropout_prob = 0.0
+ config.attention_probs_dropout_prob = 0.0
+
+ input_shape = get_shape_list(input_ids, expected_rank=2)
+ batch_size = input_shape[0]
+ seq_length = input_shape[1]
+
+ if input_mask is None:
+ input_mask = tf.ones(shape=[batch_size, seq_length], dtype=tf.int32)
+
+ if token_type_ids is None:
+ token_type_ids = tf.zeros(shape=[batch_size, seq_length], dtype=tf.int32)
+
+ with tf.variable_scope(scope, default_name="bert"):
+ with tf.variable_scope("embeddings"):
+ # Perform embedding lookup on the word ids.
+ (self.embedding_output, self.embedding_table) = embedding_lookup(
+ input_ids=input_ids,
+ vocab_size=config.vocab_size,
+ embedding_size=config.hidden_size,
+ initializer_range=config.initializer_range,
+ word_embedding_name="word_embeddings",
+ use_one_hot_embeddings=use_one_hot_embeddings)
+
+ # Add positional embeddings and token type embeddings, then layer
+ # normalize and perform dropout.
+ self.embedding_output = embedding_postprocessor(
+ input_tensor=self.embedding_output,
+ use_token_type=True,
+ token_type_ids=token_type_ids,
+ token_type_vocab_size=config.type_vocab_size,
+ token_type_embedding_name="token_type_embeddings",
+ use_position_embeddings=True,
+ position_embedding_name="position_embeddings",
+ initializer_range=config.initializer_range,
+ max_position_embeddings=config.max_position_embeddings,
+ dropout_prob=config.hidden_dropout_prob)
+
+ with tf.variable_scope("encoder"):
+ # This converts a 2D mask of shape [batch_size, seq_length] to a 3D
+ # mask of shape [batch_size, seq_length, seq_length] which is used
+ # for the attention scores.
+ attention_mask = create_attention_mask_from_input_mask(
+ input_ids, input_mask)
+
+ # Run the stacked transformer.
+ # `sequence_output` shape = [batch_size, seq_length, hidden_size].
+ self.all_encoder_layers = transformer_model(
+ input_tensor=self.embedding_output,
+ attention_mask=attention_mask,
+ hidden_size=config.hidden_size,
+ num_hidden_layers=config.num_hidden_layers,
+ num_attention_heads=config.num_attention_heads,
+ intermediate_size=config.intermediate_size,
+ intermediate_act_fn=get_activation(config.hidden_act),
+ hidden_dropout_prob=config.hidden_dropout_prob,
+ attention_probs_dropout_prob=config.attention_probs_dropout_prob,
+ initializer_range=config.initializer_range,
+ do_return_all_layers=True)
+
+ self.sequence_output = self.all_encoder_layers[-1]
+ # The "pooler" converts the encoded sequence tensor of shape
+ # [batch_size, seq_length, hidden_size] to a tensor of shape
+ # [batch_size, hidden_size]. This is necessary for segment-level
+ # (or segment-pair-level) classification tasks where we need a fixed
+ # dimensional representation of the segment.
+ with tf.variable_scope("pooler"):
+ # We "pool" the model by simply taking the hidden state corresponding
+ # to the first token. We assume that this has been pre-trained
+ first_token_tensor = tf.squeeze(self.sequence_output[:, 0:1, :], axis=1)
+ self.pooled_output = tf.layers.dense(
+ first_token_tensor,
+ config.hidden_size,
+ activation=tf.tanh,
+ kernel_initializer=create_initializer(config.initializer_range))
+
+ def get_pooled_output(self):
+ return self.pooled_output
+
+ def get_sequence_output(self):
+ """Gets final hidden layer of encoder.
+
+ Returns:
+ float Tensor of shape [batch_size, seq_length, hidden_size] corresponding
+ to the final hidden of the transformer encoder.
+ """
+ return self.sequence_output
+
+ def get_all_encoder_layers(self):
+ return self.all_encoder_layers
+
+ def get_embedding_output(self):
+ """Gets output of the embedding lookup (i.e., input to the transformer).
+
+ Returns:
+ float Tensor of shape [batch_size, seq_length, hidden_size] corresponding
+ to the output of the embedding layer, after summing the word
+ embeddings with the positional embeddings and the token type embeddings,
+ then performing layer normalization. This is the input to the transformer.
+ """
+ return self.embedding_output
+
+ def get_embedding_table(self):
+ return self.embedding_table
+
+
+def gelu(x):
+ """Gaussian Error Linear Unit.
+
+ This is a smoother version of the RELU.
+ Original paper: https://arxiv.org/abs/1606.08415
+ Args:
+ x: float Tensor to perform activation.
+
+ Returns:
+ `x` with the GELU activation applied.
+ """
+ cdf = 0.5 * (1.0 + tf.tanh(
+ (np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3)))))
+ return x * cdf
+
+
+def get_activation(activation_string):
+ """Maps a string to a Python function, e.g., "relu" => `tf.nn.relu`.
+
+ Args:
+ activation_string: String name of the activation function.
+
+ Returns:
+ A Python function corresponding to the activation function. If
+ `activation_string` is None, empty, or "linear", this will return None.
+ If `activation_string` is not a string, it will return `activation_string`.
+
+ Raises:
+ ValueError: The `activation_string` does not correspond to a known
+ activation.
+ """
+
+ # We assume that anything that"s not a string is already an activation
+ # function, so we just return it.
+ if not isinstance(activation_string, six.string_types):
+ return activation_string
+
+ if not activation_string:
+ return None
+
+ act = activation_string.lower()
+ if act == "linear":
+ return None
+ elif act == "relu":
+ return tf.nn.relu
+ elif act == "gelu":
+ return gelu
+ elif act == "tanh":
+ return tf.tanh
+ else:
+ raise ValueError("Unsupported activation: %s" % act)
+
+
+def get_assignment_map_from_checkpoint(tvars, init_checkpoint):
+ """Compute the union of the current variables and checkpoint variables."""
+ assignment_map = {}
+ initialized_variable_names = {}
+
+ name_to_variable = collections.OrderedDict()
+ for var in tvars:
+ name = var.name
+ m = re.match("^(.*):\\d+$", name)
+ if m is not None:
+ name = m.group(1)
+ name_to_variable[name] = var
+
+ init_vars = tf.train.list_variables(init_checkpoint)
+
+ assignment_map = collections.OrderedDict()
+ for x in init_vars:
+ (name, var) = (x[0], x[1])
+ if name not in name_to_variable:
+ continue
+ assignment_map[name] = name
+ initialized_variable_names[name] = 1
+ initialized_variable_names[name + ":0"] = 1
+
+ return (assignment_map, initialized_variable_names)
+
+
+def dropout(input_tensor, dropout_prob):
+ """Perform dropout.
+
+ Args:
+ input_tensor: float Tensor.
+ dropout_prob: Python float. The probability of dropping out a value (NOT of
+ *keeping* a dimension as in `tf.nn.dropout`).
+
+ Returns:
+ A version of `input_tensor` with dropout applied.
+ """
+ if dropout_prob is None or dropout_prob == 0.0:
+ return input_tensor
+
+ output = tf.nn.dropout(input_tensor, 1.0 - dropout_prob)
+ return output
+
+
+def layer_norm(input_tensor, name=None):
+ """Run layer normalization on the last dimension of the tensor."""
+ return tf.contrib.layers.layer_norm(
+ inputs=input_tensor, begin_norm_axis=-1, begin_params_axis=-1, scope=name)
+
+
+def layer_norm_and_dropout(input_tensor, dropout_prob, name=None):
+ """Runs layer normalization followed by dropout."""
+ output_tensor = layer_norm(input_tensor, name)
+ output_tensor = dropout(output_tensor, dropout_prob)
+ return output_tensor
+
+
+def create_initializer(initializer_range=0.02):
+ """Creates a `truncated_normal_initializer` with the given range."""
+ return tf.truncated_normal_initializer(stddev=initializer_range)
+
+
+def embedding_lookup(input_ids,
+ vocab_size,
+ embedding_size=128,
+ initializer_range=0.02,
+ word_embedding_name="word_embeddings",
+ use_one_hot_embeddings=False):
+ """Looks up words embeddings for id tensor.
+
+ Args:
+ input_ids: int32 Tensor of shape [batch_size, seq_length] containing word
+ ids.
+ vocab_size: int. Size of the embedding vocabulary.
+ embedding_size: int. Width of the word embeddings.
+ initializer_range: float. Embedding initialization range.
+ word_embedding_name: string. Name of the embedding table.
+ use_one_hot_embeddings: bool. If True, use one-hot method for word
+ embeddings. If False, use `tf.gather()`.
+
+ Returns:
+ float Tensor of shape [batch_size, seq_length, embedding_size].
+ """
+ # This function assumes that the input is of shape [batch_size, seq_length,
+ # num_inputs].
+ #
+ # If the input is a 2D tensor of shape [batch_size, seq_length], we
+ # reshape to [batch_size, seq_length, 1].
+ if input_ids.shape.ndims == 2:
+ input_ids = tf.expand_dims(input_ids, axis=[-1])
+
+ embedding_table = tf.get_variable(
+ name=word_embedding_name,
+ shape=[vocab_size, embedding_size],
+ initializer=create_initializer(initializer_range))
+
+ flat_input_ids = tf.reshape(input_ids, [-1])
+ if use_one_hot_embeddings:
+ one_hot_input_ids = tf.one_hot(flat_input_ids, depth=vocab_size)
+ output = tf.matmul(one_hot_input_ids, embedding_table)
+ else:
+ output = tf.gather(embedding_table, flat_input_ids)
+
+ input_shape = get_shape_list(input_ids)
+
+ output = tf.reshape(output,
+ input_shape[0:-1] + [input_shape[-1] * embedding_size])
+ return (output, embedding_table)
+
+
+def embedding_postprocessor(input_tensor,
+ use_token_type=False,
+ token_type_ids=None,
+ token_type_vocab_size=16,
+ token_type_embedding_name="token_type_embeddings",
+ use_position_embeddings=True,
+ position_embedding_name="position_embeddings",
+ initializer_range=0.02,
+ max_position_embeddings=512,
+ dropout_prob=0.1):
+ """Performs various post-processing on a word embedding tensor.
+
+ Args:
+ input_tensor: float Tensor of shape [batch_size, seq_length,
+ embedding_size].
+ use_token_type: bool. Whether to add embeddings for `token_type_ids`.
+ token_type_ids: (optional) int32 Tensor of shape [batch_size, seq_length].
+ Must be specified if `use_token_type` is True.
+ token_type_vocab_size: int. The vocabulary size of `token_type_ids`.
+ token_type_embedding_name: string. The name of the embedding table variable
+ for token type ids.
+ use_position_embeddings: bool. Whether to add position embeddings for the
+ position of each token in the sequence.
+ position_embedding_name: string. The name of the embedding table variable
+ for positional embeddings.
+ initializer_range: float. Range of the weight initialization.
+ max_position_embeddings: int. Maximum sequence length that might ever be
+ used with this model. This can be longer than the sequence length of
+ input_tensor, but cannot be shorter.
+ dropout_prob: float. Dropout probability applied to the final output tensor.
+
+ Returns:
+ float tensor with same shape as `input_tensor`.
+
+ Raises:
+ ValueError: One of the tensor shapes or input values is invalid.
+ """
+ input_shape = get_shape_list(input_tensor, expected_rank=3)
+ batch_size = input_shape[0]
+ seq_length = input_shape[1]
+ width = input_shape[2]
+
+ output = input_tensor
+
+ if use_token_type:
+ if token_type_ids is None:
+ raise ValueError("`token_type_ids` must be specified if"
+ "`use_token_type` is True.")
+ token_type_table = tf.get_variable(
+ name=token_type_embedding_name,
+ shape=[token_type_vocab_size, width],
+ initializer=create_initializer(initializer_range))
+ # This vocab will be small so we always do one-hot here, since it is always
+ # faster for a small vocabulary.
+ flat_token_type_ids = tf.reshape(token_type_ids, [-1])
+ one_hot_ids = tf.one_hot(flat_token_type_ids, depth=token_type_vocab_size)
+ token_type_embeddings = tf.matmul(one_hot_ids, token_type_table)
+ token_type_embeddings = tf.reshape(token_type_embeddings,
+ [batch_size, seq_length, width])
+ output += token_type_embeddings
+
+ if use_position_embeddings:
+ assert_op = tf.assert_less_equal(seq_length, max_position_embeddings)
+ with tf.control_dependencies([assert_op]):
+ full_position_embeddings = tf.get_variable(
+ name=position_embedding_name,
+ shape=[max_position_embeddings, width],
+ initializer=create_initializer(initializer_range))
+ # Since the position embedding table is a learned variable, we create it
+ # using a (long) sequence length `max_position_embeddings`. The actual
+ # sequence length might be shorter than this, for faster training of
+ # tasks that do not have long sequences.
+ #
+ # So `full_position_embeddings` is effectively an embedding table
+ # for position [0, 1, 2, ..., max_position_embeddings-1], and the current
+ # sequence has positions [0, 1, 2, ... seq_length-1], so we can just
+ # perform a slice.
+ position_embeddings = tf.slice(full_position_embeddings, [0, 0],
+ [seq_length, -1])
+ num_dims = len(output.shape.as_list())
+
+ # Only the last two dimensions are relevant (`seq_length` and `width`), so
+ # we broadcast among the first dimensions, which is typically just
+ # the batch size.
+ position_broadcast_shape = []
+ for _ in range(num_dims - 2):
+ position_broadcast_shape.append(1)
+ position_broadcast_shape.extend([seq_length, width])
+ position_embeddings = tf.reshape(position_embeddings,
+ position_broadcast_shape)
+ output += position_embeddings
+
+ output = layer_norm_and_dropout(output, dropout_prob)
+ return output
+
+
+def create_attention_mask_from_input_mask(from_tensor, to_mask):
+ """Create 3D attention mask from a 2D tensor mask.
+
+ Args:
+ from_tensor: 2D or 3D Tensor of shape [batch_size, from_seq_length, ...].
+ to_mask: int32 Tensor of shape [batch_size, to_seq_length].
+
+ Returns:
+ float Tensor of shape [batch_size, from_seq_length, to_seq_length].
+ """
+ from_shape = get_shape_list(from_tensor, expected_rank=[2, 3])
+ batch_size = from_shape[0]
+ from_seq_length = from_shape[1]
+
+ to_shape = get_shape_list(to_mask, expected_rank=2)
+ to_seq_length = to_shape[1]
+
+ to_mask = tf.cast(
+ tf.reshape(to_mask, [batch_size, 1, to_seq_length]), tf.float32)
+
+ # We don't assume that `from_tensor` is a mask (although it could be). We
+ # don't actually care if we attend *from* padding tokens (only *to* padding)
+ # tokens so we create a tensor of all ones.
+ #
+ # `broadcast_ones` = [batch_size, from_seq_length, 1]
+ broadcast_ones = tf.ones(
+ shape=[batch_size, from_seq_length, 1], dtype=tf.float32)
+
+ # Here we broadcast along two dimensions to create the mask.
+ mask = broadcast_ones * to_mask
+
+ return mask
+
+
+def attention_layer(from_tensor,
+ to_tensor,
+ attention_mask=None,
+ num_attention_heads=1,
+ size_per_head=512,
+ query_act=None,
+ key_act=None,
+ value_act=None,
+ attention_probs_dropout_prob=0.0,
+ initializer_range=0.02,
+ do_return_2d_tensor=False,
+ batch_size=None,
+ from_seq_length=None,
+ to_seq_length=None):
+ """Performs multi-headed attention from `from_tensor` to `to_tensor`.
+
+ This is an implementation of multi-headed attention based on "Attention
+ is all you Need". If `from_tensor` and `to_tensor` are the same, then
+ this is self-attention. Each timestep in `from_tensor` attends to the
+ corresponding sequence in `to_tensor`, and returns a fixed-with vector.
+
+ This function first projects `from_tensor` into a "query" tensor and
+ `to_tensor` into "key" and "value" tensors. These are (effectively) a list
+ of tensors of length `num_attention_heads`, where each tensor is of shape
+ [batch_size, seq_length, size_per_head].
+
+ Then, the query and key tensors are dot-producted and scaled. These are
+ softmaxed to obtain attention probabilities. The value tensors are then
+ interpolated by these probabilities, then concatenated back to a single
+ tensor and returned.
+
+ In practice, the multi-headed attention are done with transposes and
+ reshapes rather than actual separate tensors.
+
+ Args:
+ from_tensor: float Tensor of shape [batch_size, from_seq_length,
+ from_width].
+ to_tensor: float Tensor of shape [batch_size, to_seq_length, to_width].
+ attention_mask: (optional) int32 Tensor of shape [batch_size,
+ from_seq_length, to_seq_length]. The values should be 1 or 0. The
+ attention scores will effectively be set to -infinity for any positions in
+ the mask that are 0, and will be unchanged for positions that are 1.
+ num_attention_heads: int. Number of attention heads.
+ size_per_head: int. Size of each attention head.
+ query_act: (optional) Activation function for the query transform.
+ key_act: (optional) Activation function for the key transform.
+ value_act: (optional) Activation function for the value transform.
+ attention_probs_dropout_prob: (optional) float. Dropout probability of the
+ attention probabilities.
+ initializer_range: float. Range of the weight initializer.
+ do_return_2d_tensor: bool. If True, the output will be of shape [batch_size
+ * from_seq_length, num_attention_heads * size_per_head]. If False, the
+ output will be of shape [batch_size, from_seq_length, num_attention_heads
+ * size_per_head].
+ batch_size: (Optional) int. If the input is 2D, this might be the batch size
+ of the 3D version of the `from_tensor` and `to_tensor`.
+ from_seq_length: (Optional) If the input is 2D, this might be the seq length
+ of the 3D version of the `from_tensor`.
+ to_seq_length: (Optional) If the input is 2D, this might be the seq length
+ of the 3D version of the `to_tensor`.
+
+ Returns:
+ float Tensor of shape [batch_size, from_seq_length,
+ num_attention_heads * size_per_head]. (If `do_return_2d_tensor` is
+ true, this will be of shape [batch_size * from_seq_length,
+ num_attention_heads * size_per_head]).
+
+ Raises:
+ ValueError: Any of the arguments or tensor shapes are invalid.
+ """
+
+ def transpose_for_scores(input_tensor, batch_size, num_attention_heads,
+ seq_length, width):
+ output_tensor = tf.reshape(
+ input_tensor, [batch_size, seq_length, num_attention_heads, width])
+
+ output_tensor = tf.transpose(output_tensor, [0, 2, 1, 3])
+ return output_tensor
+
+ from_shape = get_shape_list(from_tensor, expected_rank=[2, 3])
+ to_shape = get_shape_list(to_tensor, expected_rank=[2, 3])
+
+ if len(from_shape) != len(to_shape):
+ raise ValueError(
+ "The rank of `from_tensor` must match the rank of `to_tensor`.")
+
+ if len(from_shape) == 3:
+ batch_size = from_shape[0]
+ from_seq_length = from_shape[1]
+ to_seq_length = to_shape[1]
+ elif len(from_shape) == 2:
+ if (batch_size is None or from_seq_length is None or to_seq_length is None):
+ raise ValueError(
+ "When passing in rank 2 tensors to attention_layer, the values "
+ "for `batch_size`, `from_seq_length`, and `to_seq_length` "
+ "must all be specified.")
+
+ # Scalar dimensions referenced here:
+ # B = batch size (number of sequences)
+ # F = `from_tensor` sequence length
+ # T = `to_tensor` sequence length
+ # N = `num_attention_heads`
+ # H = `size_per_head`
+
+ from_tensor_2d = reshape_to_matrix(from_tensor)
+ to_tensor_2d = reshape_to_matrix(to_tensor)
+
+ # `query_layer` = [B*F, N*H]
+ query_layer = tf.layers.dense(
+ from_tensor_2d,
+ num_attention_heads * size_per_head,
+ activation=query_act,
+ name="query",
+ kernel_initializer=create_initializer(initializer_range))
+
+ # `key_layer` = [B*T, N*H]
+ key_layer = tf.layers.dense(
+ to_tensor_2d,
+ num_attention_heads * size_per_head,
+ activation=key_act,
+ name="key",
+ kernel_initializer=create_initializer(initializer_range))
+
+ # `value_layer` = [B*T, N*H]
+ value_layer = tf.layers.dense(
+ to_tensor_2d,
+ num_attention_heads * size_per_head,
+ activation=value_act,
+ name="value",
+ kernel_initializer=create_initializer(initializer_range))
+
+ # `query_layer` = [B, N, F, H]
+ query_layer = transpose_for_scores(query_layer, batch_size,
+ num_attention_heads, from_seq_length,
+ size_per_head)
+
+ # `key_layer` = [B, N, T, H]
+ key_layer = transpose_for_scores(key_layer, batch_size, num_attention_heads,
+ to_seq_length, size_per_head)
+
+ # Take the dot product between "query" and "key" to get the raw
+ # attention scores.
+ # `attention_scores` = [B, N, F, T]
+ attention_scores = tf.matmul(query_layer, key_layer, transpose_b=True)
+ attention_scores = tf.multiply(attention_scores,
+ 1.0 / math.sqrt(float(size_per_head)))
+
+ if attention_mask is not None:
+ # `attention_mask` = [B, 1, F, T]
+ attention_mask = tf.expand_dims(attention_mask, axis=[1])
+
+ # Since attention_mask is 1.0 for positions we want to attend and 0.0 for
+ # masked positions, this operation will create a tensor which is 0.0 for
+ # positions we want to attend and -10000.0 for masked positions.
+ adder = (1.0 - tf.cast(attention_mask, tf.float32)) * -10000.0
+
+ # Since we are adding it to the raw scores before the softmax, this is
+ # effectively the same as removing these entirely.
+ attention_scores += adder
+
+ # Normalize the attention scores to probabilities.
+ # `attention_probs` = [B, N, F, T]
+ attention_probs = tf.nn.softmax(attention_scores)
+
+ # This is actually dropping out entire tokens to attend to, which might
+ # seem a bit unusual, but is taken from the original Transformer paper.
+ attention_probs = dropout(attention_probs, attention_probs_dropout_prob)
+
+ # `value_layer` = [B, T, N, H]
+ value_layer = tf.reshape(
+ value_layer,
+ [batch_size, to_seq_length, num_attention_heads, size_per_head])
+
+ # `value_layer` = [B, N, T, H]
+ value_layer = tf.transpose(value_layer, [0, 2, 1, 3])
+
+ # `context_layer` = [B, N, F, H]
+ context_layer = tf.matmul(attention_probs, value_layer)
+
+ # `context_layer` = [B, F, N, H]
+ context_layer = tf.transpose(context_layer, [0, 2, 1, 3])
+
+ if do_return_2d_tensor:
+ # `context_layer` = [B*F, N*H]
+ context_layer = tf.reshape(
+ context_layer,
+ [batch_size * from_seq_length, num_attention_heads * size_per_head])
+ else:
+ # `context_layer` = [B, F, N*H]
+ context_layer = tf.reshape(
+ context_layer,
+ [batch_size, from_seq_length, num_attention_heads * size_per_head])
+
+ return context_layer
+
+
+def transformer_model(input_tensor,
+ attention_mask=None,
+ hidden_size=768,
+ num_hidden_layers=12,
+ num_attention_heads=12,
+ intermediate_size=3072,
+ intermediate_act_fn=gelu,
+ hidden_dropout_prob=0.1,
+ attention_probs_dropout_prob=0.1,
+ initializer_range=0.02,
+ do_return_all_layers=False):
+ """Multi-headed, multi-layer Transformer from "Attention is All You Need".
+
+ This is almost an exact implementation of the original Transformer encoder.
+
+ See the original paper:
+ https://arxiv.org/abs/1706.03762
+
+ Also see:
+ https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/models/transformer.py
+
+ Args:
+ input_tensor: float Tensor of shape [batch_size, seq_length, hidden_size].
+ attention_mask: (optional) int32 Tensor of shape [batch_size, seq_length,
+ seq_length], with 1 for positions that can be attended to and 0 in
+ positions that should not be.
+ hidden_size: int. Hidden size of the Transformer.
+ num_hidden_layers: int. Number of layers (blocks) in the Transformer.
+ num_attention_heads: int. Number of attention heads in the Transformer.
+ intermediate_size: int. The size of the "intermediate" (a.k.a., feed
+ forward) layer.
+ intermediate_act_fn: function. The non-linear activation function to apply
+ to the output of the intermediate/feed-forward layer.
+ hidden_dropout_prob: float. Dropout probability for the hidden layers.
+ attention_probs_dropout_prob: float. Dropout probability of the attention
+ probabilities.
+ initializer_range: float. Range of the initializer (stddev of truncated
+ normal).
+ do_return_all_layers: Whether to also return all layers or just the final
+ layer.
+
+ Returns:
+ float Tensor of shape [batch_size, seq_length, hidden_size], the final
+ hidden layer of the Transformer.
+
+ Raises:
+ ValueError: A Tensor shape or parameter is invalid.
+ """
+ if hidden_size % num_attention_heads != 0:
+ raise ValueError(
+ "The hidden size (%d) is not a multiple of the number of attention "
+ "heads (%d)" % (hidden_size, num_attention_heads))
+
+ attention_head_size = int(hidden_size / num_attention_heads)
+ input_shape = get_shape_list(input_tensor, expected_rank=3)
+ batch_size = input_shape[0]
+ seq_length = input_shape[1]
+ input_width = input_shape[2]
+
+ # The Transformer performs sum residuals on all layers so the input needs
+ # to be the same as the hidden size.
+ if input_width != hidden_size:
+ raise ValueError("The width of the input tensor (%d) != hidden size (%d)" %
+ (input_width, hidden_size))
+
+ # We keep the representation as a 2D tensor to avoid re-shaping it back and
+ # forth from a 3D tensor to a 2D tensor. Re-shapes are normally free on
+ # the GPU/CPU but may not be free on the TPU, so we want to minimize them to
+ # help the optimizer.
+ prev_output = reshape_to_matrix(input_tensor)
+
+ all_layer_outputs = []
+ for layer_idx in range(num_hidden_layers):
+ with tf.variable_scope("layer_%d" % layer_idx):
+ layer_input = prev_output
+
+ with tf.variable_scope("attention"):
+ attention_heads = []
+ with tf.variable_scope("self"):
+ attention_head = attention_layer(
+ from_tensor=layer_input,
+ to_tensor=layer_input,
+ attention_mask=attention_mask,
+ num_attention_heads=num_attention_heads,
+ size_per_head=attention_head_size,
+ attention_probs_dropout_prob=attention_probs_dropout_prob,
+ initializer_range=initializer_range,
+ do_return_2d_tensor=True,
+ batch_size=batch_size,
+ from_seq_length=seq_length,
+ to_seq_length=seq_length)
+ attention_heads.append(attention_head)
+
+ attention_output = None
+ if len(attention_heads) == 1:
+ attention_output = attention_heads[0]
+ else:
+ # In the case where we have other sequences, we just concatenate
+ # them to the self-attention head before the projection.
+ attention_output = tf.concat(attention_heads, axis=-1)
+
+ # Run a linear projection of `hidden_size` then add a residual
+ # with `layer_input`.
+ with tf.variable_scope("output"):
+ attention_output = tf.layers.dense(
+ attention_output,
+ hidden_size,
+ kernel_initializer=create_initializer(initializer_range))
+ attention_output = dropout(attention_output, hidden_dropout_prob)
+ attention_output = layer_norm(attention_output + layer_input)
+
+ # The activation is only applied to the "intermediate" hidden layer.
+ with tf.variable_scope("intermediate"):
+ intermediate_output = tf.layers.dense(
+ attention_output,
+ intermediate_size,
+ activation=intermediate_act_fn,
+ kernel_initializer=create_initializer(initializer_range))
+
+ # Down-project back to `hidden_size` then add the residual.
+ with tf.variable_scope("output"):
+ layer_output = tf.layers.dense(
+ intermediate_output,
+ hidden_size,
+ kernel_initializer=create_initializer(initializer_range))
+ layer_output = dropout(layer_output, hidden_dropout_prob)
+ layer_output = layer_norm(layer_output + attention_output)
+ prev_output = layer_output
+ all_layer_outputs.append(layer_output)
+
+ if do_return_all_layers:
+ final_outputs = []
+ for layer_output in all_layer_outputs:
+ final_output = reshape_from_matrix(layer_output, input_shape)
+ final_outputs.append(final_output)
+ return final_outputs
+ else:
+ final_output = reshape_from_matrix(prev_output, input_shape)
+ return final_output
+
+
+def get_shape_list(tensor, expected_rank=None, name=None):
+ """Returns a list of the shape of tensor, preferring static dimensions.
+
+ Args:
+ tensor: A tf.Tensor object to find the shape of.
+ expected_rank: (optional) int. The expected rank of `tensor`. If this is
+ specified and the `tensor` has a different rank, and exception will be
+ thrown.
+ name: Optional name of the tensor for the error message.
+
+ Returns:
+ A list of dimensions of the shape of tensor. All static dimensions will
+ be returned as python integers, and dynamic dimensions will be returned
+ as tf.Tensor scalars.
+ """
+ if name is None:
+ name = tensor.name
+
+ if expected_rank is not None:
+ assert_rank(tensor, expected_rank, name)
+
+ shape = tensor.shape.as_list()
+
+ non_static_indexes = []
+ for (index, dim) in enumerate(shape):
+ if dim is None:
+ non_static_indexes.append(index)
+
+ if not non_static_indexes:
+ return shape
+
+ dyn_shape = tf.shape(tensor)
+ for index in non_static_indexes:
+ shape[index] = dyn_shape[index]
+ return shape
+
+
+def reshape_to_matrix(input_tensor):
+ """Reshapes a >= rank 2 tensor to a rank 2 tensor (i.e., a matrix)."""
+ ndims = input_tensor.shape.ndims
+ if ndims < 2:
+ raise ValueError("Input tensor must have at least rank 2. Shape = %s" %
+ (input_tensor.shape))
+ if ndims == 2:
+ return input_tensor
+
+ width = input_tensor.shape[-1]
+ output_tensor = tf.reshape(input_tensor, [-1, width])
+ return output_tensor
+
+
+def reshape_from_matrix(output_tensor, orig_shape_list):
+ """Reshapes a rank 2 tensor back to its original rank >= 2 tensor."""
+ if len(orig_shape_list) == 2:
+ return output_tensor
+
+ output_shape = get_shape_list(output_tensor)
+
+ orig_dims = orig_shape_list[0:-1]
+ width = output_shape[-1]
+
+ return tf.reshape(output_tensor, orig_dims + [width])
+
+
+def assert_rank(tensor, expected_rank, name=None):
+ """Raises an exception if the tensor rank is not of the expected rank.
+
+ Args:
+ tensor: A tf.Tensor to check the rank of.
+ expected_rank: Python integer or list of integers, expected rank.
+ name: Optional name of the tensor for the error message.
+
+ Raises:
+ ValueError: If the expected shape doesn't match the actual shape.
+ """
+ if name is None:
+ name = tensor.name
+
+ expected_rank_dict = {}
+ if isinstance(expected_rank, six.integer_types):
+ expected_rank_dict[expected_rank] = True
+ else:
+ for x in expected_rank:
+ expected_rank_dict[x] = True
+
+ actual_rank = tensor.shape.ndims
+ if actual_rank not in expected_rank_dict:
+ scope_name = tf.get_variable_scope().name
+ raise ValueError(
+ "For the tensor `%s` in scope `%s`, the actual rank "
+ "`%d` (shape = %s) is not equal to the expected rank `%s`" %
+ (name, scope_name, actual_rank, str(tensor.shape), str(expected_rank)))
diff --git a/modeling_test.py b/modeling_test.py
new file mode 100644
index 0000000..817ad2d
--- /dev/null
+++ b/modeling_test.py
@@ -0,0 +1,277 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import json
+import random
+import re
+
+import modeling
+import six
+import tensorflow as tf
+
+
+class BertModelTest(tf.test.TestCase):
+
+ class BertModelTester(object):
+
+ def __init__(self,
+ parent,
+ batch_size=13,
+ seq_length=7,
+ is_training=True,
+ use_input_mask=True,
+ use_token_type_ids=True,
+ vocab_size=99,
+ hidden_size=32,
+ num_hidden_layers=5,
+ num_attention_heads=4,
+ intermediate_size=37,
+ hidden_act="gelu",
+ hidden_dropout_prob=0.1,
+ attention_probs_dropout_prob=0.1,
+ max_position_embeddings=512,
+ type_vocab_size=16,
+ initializer_range=0.02,
+ scope=None):
+ self.parent = parent
+ self.batch_size = batch_size
+ self.seq_length = seq_length
+ self.is_training = is_training
+ self.use_input_mask = use_input_mask
+ self.use_token_type_ids = use_token_type_ids
+ self.vocab_size = vocab_size
+ self.hidden_size = hidden_size
+ self.num_hidden_layers = num_hidden_layers
+ self.num_attention_heads = num_attention_heads
+ self.intermediate_size = intermediate_size
+ self.hidden_act = hidden_act
+ self.hidden_dropout_prob = hidden_dropout_prob
+ self.attention_probs_dropout_prob = attention_probs_dropout_prob
+ self.max_position_embeddings = max_position_embeddings
+ self.type_vocab_size = type_vocab_size
+ self.initializer_range = initializer_range
+ self.scope = scope
+
+ def create_model(self):
+ input_ids = BertModelTest.ids_tensor([self.batch_size, self.seq_length],
+ self.vocab_size)
+
+ input_mask = None
+ if self.use_input_mask:
+ input_mask = BertModelTest.ids_tensor(
+ [self.batch_size, self.seq_length], vocab_size=2)
+
+ token_type_ids = None
+ if self.use_token_type_ids:
+ token_type_ids = BertModelTest.ids_tensor(
+ [self.batch_size, self.seq_length], self.type_vocab_size)
+
+ config = modeling.BertConfig(
+ vocab_size=self.vocab_size,
+ hidden_size=self.hidden_size,
+ num_hidden_layers=self.num_hidden_layers,
+ num_attention_heads=self.num_attention_heads,
+ intermediate_size=self.intermediate_size,
+ hidden_act=self.hidden_act,
+ hidden_dropout_prob=self.hidden_dropout_prob,
+ attention_probs_dropout_prob=self.attention_probs_dropout_prob,
+ max_position_embeddings=self.max_position_embeddings,
+ type_vocab_size=self.type_vocab_size,
+ initializer_range=self.initializer_range)
+
+ model = modeling.BertModel(
+ config=config,
+ is_training=self.is_training,
+ input_ids=input_ids,
+ input_mask=input_mask,
+ token_type_ids=token_type_ids,
+ scope=self.scope)
+
+ outputs = {
+ "embedding_output": model.get_embedding_output(),
+ "sequence_output": model.get_sequence_output(),
+ "pooled_output": model.get_pooled_output(),
+ "all_encoder_layers": model.get_all_encoder_layers(),
+ }
+ return outputs
+
+ def check_output(self, result):
+ self.parent.assertAllEqual(
+ result["embedding_output"].shape,
+ [self.batch_size, self.seq_length, self.hidden_size])
+
+ self.parent.assertAllEqual(
+ result["sequence_output"].shape,
+ [self.batch_size, self.seq_length, self.hidden_size])
+
+ self.parent.assertAllEqual(result["pooled_output"].shape,
+ [self.batch_size, self.hidden_size])
+
+ def test_default(self):
+ self.run_tester(BertModelTest.BertModelTester(self))
+
+ def test_config_to_json_string(self):
+ config = modeling.BertConfig(vocab_size=99, hidden_size=37)
+ obj = json.loads(config.to_json_string())
+ self.assertEqual(obj["vocab_size"], 99)
+ self.assertEqual(obj["hidden_size"], 37)
+
+ def run_tester(self, tester):
+ with self.test_session() as sess:
+ ops = tester.create_model()
+ init_op = tf.group(tf.global_variables_initializer(),
+ tf.local_variables_initializer())
+ sess.run(init_op)
+ output_result = sess.run(ops)
+ tester.check_output(output_result)
+
+ self.assert_all_tensors_reachable(sess, [init_op, ops])
+
+ @classmethod
+ def ids_tensor(cls, shape, vocab_size, rng=None, name=None):
+ """Creates a random int32 tensor of the shape within the vocab size."""
+ if rng is None:
+ rng = random.Random()
+
+ total_dims = 1
+ for dim in shape:
+ total_dims *= dim
+
+ values = []
+ for _ in range(total_dims):
+ values.append(rng.randint(0, vocab_size - 1))
+
+ return tf.constant(value=values, dtype=tf.int32, shape=shape, name=name)
+
+ def assert_all_tensors_reachable(self, sess, outputs):
+ """Checks that all the tensors in the graph are reachable from outputs."""
+ graph = sess.graph
+
+ ignore_strings = [
+ "^.*/assert_less_equal/.*$",
+ "^.*/dilation_rate$",
+ "^.*/Tensordot/concat$",
+ "^.*/Tensordot/concat/axis$",
+ "^testing/.*$",
+ ]
+
+ ignore_regexes = [re.compile(x) for x in ignore_strings]
+
+ unreachable = self.get_unreachable_ops(graph, outputs)
+ filtered_unreachable = []
+ for x in unreachable:
+ do_ignore = False
+ for r in ignore_regexes:
+ m = r.match(x.name)
+ if m is not None:
+ do_ignore = True
+ if do_ignore:
+ continue
+ filtered_unreachable.append(x)
+ unreachable = filtered_unreachable
+
+ self.assertEqual(
+ len(unreachable), 0, "The following ops are unreachable: %s" %
+ (" ".join([x.name for x in unreachable])))
+
+ @classmethod
+ def get_unreachable_ops(cls, graph, outputs):
+ """Finds all of the tensors in graph that are unreachable from outputs."""
+ outputs = cls.flatten_recursive(outputs)
+ output_to_op = collections.defaultdict(list)
+ op_to_all = collections.defaultdict(list)
+ assign_out_to_in = collections.defaultdict(list)
+
+ for op in graph.get_operations():
+ for x in op.inputs:
+ op_to_all[op.name].append(x.name)
+ for y in op.outputs:
+ output_to_op[y.name].append(op.name)
+ op_to_all[op.name].append(y.name)
+ if str(op.type) == "Assign":
+ for y in op.outputs:
+ for x in op.inputs:
+ assign_out_to_in[y.name].append(x.name)
+
+ assign_groups = collections.defaultdict(list)
+ for out_name in assign_out_to_in.keys():
+ name_group = assign_out_to_in[out_name]
+ for n1 in name_group:
+ assign_groups[n1].append(out_name)
+ for n2 in name_group:
+ if n1 != n2:
+ assign_groups[n1].append(n2)
+
+ seen_tensors = {}
+ stack = [x.name for x in outputs]
+ while stack:
+ name = stack.pop()
+ if name in seen_tensors:
+ continue
+ seen_tensors[name] = True
+
+ if name in output_to_op:
+ for op_name in output_to_op[name]:
+ if op_name in op_to_all:
+ for input_name in op_to_all[op_name]:
+ if input_name not in stack:
+ stack.append(input_name)
+
+ expanded_names = []
+ if name in assign_groups:
+ for assign_name in assign_groups[name]:
+ expanded_names.append(assign_name)
+
+ for expanded_name in expanded_names:
+ if expanded_name not in stack:
+ stack.append(expanded_name)
+
+ unreachable_ops = []
+ for op in graph.get_operations():
+ is_unreachable = False
+ all_names = [x.name for x in op.inputs] + [x.name for x in op.outputs]
+ for name in all_names:
+ if name not in seen_tensors:
+ is_unreachable = True
+ if is_unreachable:
+ unreachable_ops.append(op)
+ return unreachable_ops
+
+ @classmethod
+ def flatten_recursive(cls, item):
+ """Flattens (potentially nested) a tuple/dictionary/list to a list."""
+ output = []
+ if isinstance(item, list):
+ output.extend(item)
+ elif isinstance(item, tuple):
+ output.extend(list(item))
+ elif isinstance(item, dict):
+ for (_, v) in six.iteritems(item):
+ output.append(v)
+ else:
+ return [item]
+
+ flat_output = []
+ for x in output:
+ flat_output.extend(cls.flatten_recursive(x))
+ return flat_output
+
+
+if __name__ == "__main__":
+ tf.test.main()
diff --git a/optimization.py b/optimization.py
new file mode 100644
index 0000000..d33dabd
--- /dev/null
+++ b/optimization.py
@@ -0,0 +1,174 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Functions and classes related to optimization (weight updates)."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import re
+import tensorflow as tf
+
+
+def create_optimizer(loss, init_lr, num_train_steps, num_warmup_steps, use_tpu):
+ """Creates an optimizer training op."""
+ global_step = tf.train.get_or_create_global_step()
+
+ learning_rate = tf.constant(value=init_lr, shape=[], dtype=tf.float32)
+
+ # Implements linear decay of the learning rate.
+ learning_rate = tf.train.polynomial_decay(
+ learning_rate,
+ global_step,
+ num_train_steps,
+ end_learning_rate=0.0,
+ power=1.0,
+ cycle=False)
+
+ # Implements linear warmup. I.e., if global_step < num_warmup_steps, the
+ # learning rate will be `global_step/num_warmup_steps * init_lr`.
+ if num_warmup_steps:
+ global_steps_int = tf.cast(global_step, tf.int32)
+ warmup_steps_int = tf.constant(num_warmup_steps, dtype=tf.int32)
+
+ global_steps_float = tf.cast(global_steps_int, tf.float32)
+ warmup_steps_float = tf.cast(warmup_steps_int, tf.float32)
+
+ warmup_percent_done = global_steps_float / warmup_steps_float
+ warmup_learning_rate = init_lr * warmup_percent_done
+
+ is_warmup = tf.cast(global_steps_int < warmup_steps_int, tf.float32)
+ learning_rate = (
+ (1.0 - is_warmup) * learning_rate + is_warmup * warmup_learning_rate)
+
+ # It is recommended that you use this optimizer for fine tuning, since this
+ # is how the model was trained (note that the Adam m/v variables are NOT
+ # loaded from init_checkpoint.)
+ optimizer = AdamWeightDecayOptimizer(
+ learning_rate=learning_rate,
+ weight_decay_rate=0.01,
+ beta_1=0.9,
+ beta_2=0.999,
+ epsilon=1e-6,
+ exclude_from_weight_decay=["LayerNorm", "layer_norm", "bias"])
+
+ if use_tpu:
+ optimizer = tf.contrib.tpu.CrossShardOptimizer(optimizer)
+
+ tvars = tf.trainable_variables()
+ grads = tf.gradients(loss, tvars)
+
+ # This is how the model was pre-trained.
+ (grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0)
+
+ train_op = optimizer.apply_gradients(
+ zip(grads, tvars), global_step=global_step)
+
+ # Normally the global step update is done inside of `apply_gradients`.
+ # However, `AdamWeightDecayOptimizer` doesn't do this. But if you use
+ # a different optimizer, you should probably take this line out.
+ new_global_step = global_step + 1
+ train_op = tf.group(train_op, [global_step.assign(new_global_step)])
+ return train_op
+
+
+class AdamWeightDecayOptimizer(tf.train.Optimizer):
+ """A basic Adam optimizer that includes "correct" L2 weight decay."""
+
+ def __init__(self,
+ learning_rate,
+ weight_decay_rate=0.0,
+ beta_1=0.9,
+ beta_2=0.999,
+ epsilon=1e-6,
+ exclude_from_weight_decay=None,
+ name="AdamWeightDecayOptimizer"):
+ """Constructs a AdamWeightDecayOptimizer."""
+ super(AdamWeightDecayOptimizer, self).__init__(False, name)
+
+ self.learning_rate = learning_rate
+ self.weight_decay_rate = weight_decay_rate
+ self.beta_1 = beta_1
+ self.beta_2 = beta_2
+ self.epsilon = epsilon
+ self.exclude_from_weight_decay = exclude_from_weight_decay
+
+ def apply_gradients(self, grads_and_vars, global_step=None, name=None):
+ """See base class."""
+ assignments = []
+ for (grad, param) in grads_and_vars:
+ if grad is None or param is None:
+ continue
+
+ param_name = self._get_variable_name(param.name)
+
+ m = tf.get_variable(
+ name=param_name + "/adam_m",
+ shape=param.shape.as_list(),
+ dtype=tf.float32,
+ trainable=False,
+ initializer=tf.zeros_initializer())
+ v = tf.get_variable(
+ name=param_name + "/adam_v",
+ shape=param.shape.as_list(),
+ dtype=tf.float32,
+ trainable=False,
+ initializer=tf.zeros_initializer())
+
+ # Standard Adam update.
+ next_m = (
+ tf.multiply(self.beta_1, m) + tf.multiply(1.0 - self.beta_1, grad))
+ next_v = (
+ tf.multiply(self.beta_2, v) + tf.multiply(1.0 - self.beta_2,
+ tf.square(grad)))
+
+ update = next_m / (tf.sqrt(next_v) + self.epsilon)
+
+ # Just adding the square of the weights to the loss function is *not*
+ # the correct way of using L2 regularization/weight decay with Adam,
+ # since that will interact with the m and v parameters in strange ways.
+ #
+ # Instead we want ot decay the weights in a manner that doesn't interact
+ # with the m/v parameters. This is equivalent to adding the square
+ # of the weights to the loss with plain (non-momentum) SGD.
+ if self._do_use_weight_decay(param_name):
+ update += self.weight_decay_rate * param
+
+ update_with_lr = self.learning_rate * update
+
+ next_param = param - update_with_lr
+
+ assignments.extend(
+ [param.assign(next_param),
+ m.assign(next_m),
+ v.assign(next_v)])
+ return tf.group(*assignments, name=name)
+
+ def _do_use_weight_decay(self, param_name):
+ """Whether to use L2 weight decay for `param_name`."""
+ if not self.weight_decay_rate:
+ return False
+ if self.exclude_from_weight_decay:
+ for r in self.exclude_from_weight_decay:
+ if re.search(r, param_name) is not None:
+ return False
+ return True
+
+ def _get_variable_name(self, param_name):
+ """Get the variable name from the tensor name."""
+ m = re.match("^(.*):\\d+$", param_name)
+ if m is not None:
+ param_name = m.group(1)
+ return param_name
diff --git a/optimization_test.py b/optimization_test.py
new file mode 100644
index 0000000..4f2dcf1
--- /dev/null
+++ b/optimization_test.py
@@ -0,0 +1,48 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import optimization
+import tensorflow as tf
+
+
+class OptimizationTest(tf.test.TestCase):
+
+ def test_adam(self):
+ with self.test_session() as sess:
+ w = tf.get_variable(
+ "w",
+ shape=[3],
+ initializer=tf.constant_initializer([0.1, -0.2, -0.1]))
+ x = tf.constant([0.4, 0.2, -0.5])
+ loss = tf.reduce_mean(tf.square(x - w))
+ tvars = tf.trainable_variables()
+ grads = tf.gradients(loss, tvars)
+ global_step = tf.train.get_or_create_global_step()
+ optimizer = optimization.AdamWeightDecayOptimizer(learning_rate=0.2)
+ train_op = optimizer.apply_gradients(zip(grads, tvars), global_step)
+ init_op = tf.group(tf.global_variables_initializer(),
+ tf.local_variables_initializer())
+ sess.run(init_op)
+ for _ in range(100):
+ sess.run(train_op)
+ w_np = sess.run(w)
+ self.assertAllClose(w_np.flat, [0.4, 0.2, -0.5], rtol=1e-2, atol=1e-2)
+
+
+if __name__ == "__main__":
+ tf.test.main()
diff --git a/predicting_movie_reviews_with_bert_on_tf_hub.ipynb b/predicting_movie_reviews_with_bert_on_tf_hub.ipynb
new file mode 100644
index 0000000..466857f
--- /dev/null
+++ b/predicting_movie_reviews_with_bert_on_tf_hub.ipynb
@@ -0,0 +1,1231 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "name": "Predicting Movie Reviews with BERT on TF Hub.ipynb",
+ "version": "0.3.2",
+ "provenance": [],
+ "collapsed_sections": []
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "accelerator": "GPU"
+ },
+ "cells": [
+ {
+ "metadata": {
+ "id": "j0a4mTk9o1Qg",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "cell_type": "code",
+ "source": [
+ "# Copyright 2019 Google Inc.\n",
+ "\n",
+ "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+ "# you may not use this file except in compliance with the License.\n",
+ "# You may obtain a copy of the License at\n",
+ "\n",
+ "# http://www.apache.org/licenses/LICENSE-2.0\n",
+ "\n",
+ "# Unless required by applicable law or agreed to in writing, software\n",
+ "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+ "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+ "# See the License for the specific language governing permissions and\n",
+ "# limitations under the License."
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "metadata": {
+ "id": "dCpvgG0vwXAZ",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "#Predicting Movie Review Sentiment with BERT on TF Hub"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "xiYrZKaHwV81",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "If you’ve been following Natural Language Processing over the past year, you’ve probably heard of BERT: Bidirectional Encoder Representations from Transformers. It’s a neural network architecture designed by Google researchers that’s totally transformed what’s state-of-the-art for NLP tasks, like text classification, translation, summarization, and question answering.\n",
+ "\n",
+ "Now that BERT's been added to [TF Hub](https://www.tensorflow.org/hub) as a loadable module, it's easy(ish) to add into existing Tensorflow text pipelines. In an existing pipeline, BERT can replace text embedding layers like ELMO and GloVE. Alternatively, [finetuning](http://wiki.fast.ai/index.php/Fine_tuning) BERT can provide both an accuracy boost and faster training time in many cases.\n",
+ "\n",
+ "Here, we'll train a model to predict whether an IMDB movie review is positive or negative using BERT in Tensorflow with tf hub. Some code was adapted from [this colab notebook](https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb). Let's get started!"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "hsZvic2YxnTz",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "cell_type": "code",
+ "source": [
+ "from sklearn.model_selection import train_test_split\n",
+ "import pandas as pd\n",
+ "import tensorflow as tf\n",
+ "import tensorflow_hub as hub\n",
+ "from datetime import datetime"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "metadata": {
+ "id": "cp5wfXDx5SPH",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "In addition to the standard libraries we imported above, we'll need to install BERT's python package."
+ ]
+ },
+ {
+ "metadata": {
+ "id": "jviywGyWyKsA",
+ "colab_type": "code",
+ "outputId": "166f3005-d219-404f-b201-2a0b75480360",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 51
+ }
+ },
+ "cell_type": "code",
+ "source": [
+ "!pip install bert-tensorflow"
+ ],
+ "execution_count": 38,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Requirement already satisfied: bert-tensorflow in /usr/local/lib/python3.6/dist-packages (1.0.1)\n",
+ "Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from bert-tensorflow) (1.11.0)\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "metadata": {
+ "id": "hhbGEfwgdEtw",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "cell_type": "code",
+ "source": [
+ "import bert\n",
+ "from bert import run_classifier\n",
+ "from bert import optimization\n",
+ "from bert import tokenization"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "metadata": {
+ "id": "KVB3eOcjxxm1",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "Below, we'll set an output directory location to store our model output and checkpoints. This can be a local directory, in which case you'd set OUTPUT_DIR to the name of the directory you'd like to create. If you're running this code in Google's hosted Colab, the directory won't persist after the Colab session ends.\n",
+ "\n",
+ "Alternatively, if you're a GCP user, you can store output in a GCP bucket. To do that, set a directory name in OUTPUT_DIR and the name of the GCP bucket in the BUCKET field.\n",
+ "\n",
+ "Set DO_DELETE to rewrite the OUTPUT_DIR if it exists. Otherwise, Tensorflow will load existing model checkpoints from that directory (if they exist)."
+ ]
+ },
+ {
+ "metadata": {
+ "id": "US_EAnICvP7f",
+ "colab_type": "code",
+ "outputId": "7780a032-31d4-4794-e6aa-664a5d2ae7dd",
+ "cellView": "form",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 34
+ }
+ },
+ "cell_type": "code",
+ "source": [
+ "# Set the output directory for saving model file\n",
+ "# Optionally, set a GCP bucket location\n",
+ "\n",
+ "OUTPUT_DIR = 'OUTPUT_DIR_NAME'#@param {type:\"string\"}\n",
+ "#@markdown Whether or not to clear/delete the directory and create a new one\n",
+ "DO_DELETE = False #@param {type:\"boolean\"}\n",
+ "#@markdown Set USE_BUCKET and BUCKET if you want to (optionally) store model output on GCP bucket.\n",
+ "USE_BUCKET = True #@param {type:\"boolean\"}\n",
+ "BUCKET = 'BUCKET_NAME' #@param {type:\"string\"}\n",
+ "\n",
+ "if USE_BUCKET:\n",
+ " OUTPUT_DIR = 'gs://{}/{}'.format(BUCKET, OUTPUT_DIR)\n",
+ " from google.colab import auth\n",
+ " auth.authenticate_user()\n",
+ "\n",
+ "if DO_DELETE:\n",
+ " try:\n",
+ " tf.gfile.DeleteRecursively(OUTPUT_DIR)\n",
+ " except:\n",
+ " # Doesn't matter if the directory didn't exist\n",
+ " pass\n",
+ "tf.gfile.MakeDirs(OUTPUT_DIR)\n",
+ "print('***** Model output directory: {} *****'.format(OUTPUT_DIR))\n"
+ ],
+ "execution_count": 40,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "***** Model output directory: gs://bert-tfhub/aclImdb_v1 *****\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "metadata": {
+ "id": "pmFYvkylMwXn",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "#Data"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "MC_w8SRqN0fr",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "First, let's download the dataset, hosted by Stanford. The code below, which downloads, extracts, and imports the IMDB Large Movie Review Dataset, is borrowed from [this Tensorflow tutorial](https://www.tensorflow.org/hub/tutorials/text_classification_with_tf_hub)."
+ ]
+ },
+ {
+ "metadata": {
+ "id": "fom_ff20gyy6",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "cell_type": "code",
+ "source": [
+ "from tensorflow import keras\n",
+ "import os\n",
+ "import re\n",
+ "\n",
+ "# Load all files from a directory in a DataFrame.\n",
+ "def load_directory_data(directory):\n",
+ " data = {}\n",
+ " data[\"sentence\"] = []\n",
+ " data[\"sentiment\"] = []\n",
+ " for file_path in os.listdir(directory):\n",
+ " with tf.gfile.GFile(os.path.join(directory, file_path), \"r\") as f:\n",
+ " data[\"sentence\"].append(f.read())\n",
+ " data[\"sentiment\"].append(re.match(\"\\d+_(\\d+)\\.txt\", file_path).group(1))\n",
+ " return pd.DataFrame.from_dict(data)\n",
+ "\n",
+ "# Merge positive and negative examples, add a polarity column and shuffle.\n",
+ "def load_dataset(directory):\n",
+ " pos_df = load_directory_data(os.path.join(directory, \"pos\"))\n",
+ " neg_df = load_directory_data(os.path.join(directory, \"neg\"))\n",
+ " pos_df[\"polarity\"] = 1\n",
+ " neg_df[\"polarity\"] = 0\n",
+ " return pd.concat([pos_df, neg_df]).sample(frac=1).reset_index(drop=True)\n",
+ "\n",
+ "# Download and process the dataset files.\n",
+ "def download_and_load_datasets(force_download=False):\n",
+ " dataset = tf.keras.utils.get_file(\n",
+ " fname=\"aclImdb.tar.gz\", \n",
+ " origin=\"http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz\", \n",
+ " extract=True)\n",
+ " \n",
+ " train_df = load_dataset(os.path.join(os.path.dirname(dataset), \n",
+ " \"aclImdb\", \"train\"))\n",
+ " test_df = load_dataset(os.path.join(os.path.dirname(dataset), \n",
+ " \"aclImdb\", \"test\"))\n",
+ " \n",
+ " return train_df, test_df\n"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "metadata": {
+ "id": "2abfwdn-g135",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "cell_type": "code",
+ "source": [
+ "train, test = download_and_load_datasets()"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "metadata": {
+ "id": "XA8WHJgzhIZf",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "To keep training fast, we'll take a sample of 5000 train and test examples, respectively."
+ ]
+ },
+ {
+ "metadata": {
+ "id": "lw_F488eixTV",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "cell_type": "code",
+ "source": [
+ "train = train.sample(5000)\n",
+ "test = test.sample(5000)"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "metadata": {
+ "id": "prRQM8pDi8xI",
+ "colab_type": "code",
+ "outputId": "34445cb8-2be0-4379-fdbc-7794091f6049",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 34
+ }
+ },
+ "cell_type": "code",
+ "source": [
+ "train.columns"
+ ],
+ "execution_count": 44,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "Index(['sentence', 'sentiment', 'polarity'], dtype='object')"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 44
+ }
+ ]
+ },
+ {
+ "metadata": {
+ "id": "sfRnHSz3iSXz",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "For us, our input data is the 'sentence' column and our label is the 'polarity' column (0, 1 for negative and positive, respecitvely)"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "IuMOGwFui4it",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "cell_type": "code",
+ "source": [
+ "DATA_COLUMN = 'sentence'\n",
+ "LABEL_COLUMN = 'polarity'\n",
+ "# label_list is the list of labels, i.e. True, False or 0, 1 or 'dog', 'cat'\n",
+ "label_list = [0, 1]"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "metadata": {
+ "id": "V399W0rqNJ-Z",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "#Data Preprocessing\n",
+ "We'll need to transform our data into a format BERT understands. This involves two steps. First, we create `InputExample`'s using the constructor provided in the BERT library.\n",
+ "\n",
+ "- `text_a` is the text we want to classify, which in this case, is the `Request` field in our Dataframe. \n",
+ "- `text_b` is used if we're training a model to understand the relationship between sentences (i.e. is `text_b` a translation of `text_a`? Is `text_b` an answer to the question asked by `text_a`?). This doesn't apply to our task, so we can leave `text_b` blank.\n",
+ "- `label` is the label for our example, i.e. True, False"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "p9gEt5SmM6i6",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "cell_type": "code",
+ "source": [
+ "# Use the InputExample class from BERT's run_classifier code to create examples from the data\n",
+ "train_InputExamples = train.apply(lambda x: bert.run_classifier.InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this example\n",
+ " text_a = x[DATA_COLUMN], \n",
+ " text_b = None, \n",
+ " label = x[LABEL_COLUMN]), axis = 1)\n",
+ "\n",
+ "test_InputExamples = test.apply(lambda x: bert.run_classifier.InputExample(guid=None, \n",
+ " text_a = x[DATA_COLUMN], \n",
+ " text_b = None, \n",
+ " label = x[LABEL_COLUMN]), axis = 1)"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "metadata": {
+ "id": "SCZWZtKxObjh",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "Next, we need to preprocess our data so that it matches the data BERT was trained on. For this, we'll need to do a couple of things (but don't worry--this is also included in the Python library):\n",
+ "\n",
+ "\n",
+ "1. Lowercase our text (if we're using a BERT lowercase model)\n",
+ "2. Tokenize it (i.e. \"sally says hi\" -> [\"sally\", \"says\", \"hi\"])\n",
+ "3. Break words into WordPieces (i.e. \"calling\" -> [\"call\", \"##ing\"])\n",
+ "4. Map our words to indexes using a vocab file that BERT provides\n",
+ "5. Add special \"CLS\" and \"SEP\" tokens (see the [readme](https://github.com/google-research/bert))\n",
+ "6. Append \"index\" and \"segment\" tokens to each input (see the [BERT paper](https://arxiv.org/pdf/1810.04805.pdf))\n",
+ "\n",
+ "Happily, we don't have to worry about most of these details.\n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "qMWiDtpyQSoU",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "To start, we'll need to load a vocabulary file and lowercasing information directly from the BERT tf hub module:"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "IhJSe0QHNG7U",
+ "colab_type": "code",
+ "outputId": "20b28cc7-3cb3-4ce6-bfff-a7847ce3bbaa",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 34
+ }
+ },
+ "cell_type": "code",
+ "source": [
+ "# This is a path to an uncased (all lowercase) version of BERT\n",
+ "BERT_MODEL_HUB = \"https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1\"\n",
+ "\n",
+ "def create_tokenizer_from_hub_module():\n",
+ " \"\"\"Get the vocab file and casing info from the Hub module.\"\"\"\n",
+ " with tf.Graph().as_default():\n",
+ " bert_module = hub.Module(BERT_MODEL_HUB)\n",
+ " tokenization_info = bert_module(signature=\"tokenization_info\", as_dict=True)\n",
+ " with tf.Session() as sess:\n",
+ " vocab_file, do_lower_case = sess.run([tokenization_info[\"vocab_file\"],\n",
+ " tokenization_info[\"do_lower_case\"]])\n",
+ " \n",
+ " return bert.tokenization.FullTokenizer(\n",
+ " vocab_file=vocab_file, do_lower_case=do_lower_case)\n",
+ "\n",
+ "tokenizer = create_tokenizer_from_hub_module()"
+ ],
+ "execution_count": 47,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "INFO:tensorflow:Saver not created because there are no variables in the graph to restore\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "metadata": {
+ "id": "z4oFkhpZBDKm",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "Great--we just learned that the BERT model we're using expects lowercase data (that's what stored in tokenization_info[\"do_lower_case\"]) and we also loaded BERT's vocab file. We also created a tokenizer, which breaks words into word pieces:"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "dsBo6RCtQmwx",
+ "colab_type": "code",
+ "outputId": "9af8c917-90ec-4fe9-897b-79dc89ca88e1",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 221
+ }
+ },
+ "cell_type": "code",
+ "source": [
+ "tokenizer.tokenize(\"This here's an example of using the BERT tokenizer\")"
+ ],
+ "execution_count": 48,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "['this',\n",
+ " 'here',\n",
+ " \"'\",\n",
+ " 's',\n",
+ " 'an',\n",
+ " 'example',\n",
+ " 'of',\n",
+ " 'using',\n",
+ " 'the',\n",
+ " 'bert',\n",
+ " 'token',\n",
+ " '##izer']"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 48
+ }
+ ]
+ },
+ {
+ "metadata": {
+ "id": "0OEzfFIt6GIc",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "Using our tokenizer, we'll call `run_classifier.convert_examples_to_features` on our InputExamples to convert them into features BERT understands."
+ ]
+ },
+ {
+ "metadata": {
+ "id": "LL5W8gEGRTAf",
+ "colab_type": "code",
+ "outputId": "65001dda-155b-48fc-b5fc-1e4cabc8dfbf",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1261
+ }
+ },
+ "cell_type": "code",
+ "source": [
+ "# We'll set sequences to be at most 128 tokens long.\n",
+ "MAX_SEQ_LENGTH = 128\n",
+ "# Convert our train and test features to InputFeatures that BERT understands.\n",
+ "train_features = bert.run_classifier.convert_examples_to_features(train_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)\n",
+ "test_features = bert.run_classifier.convert_examples_to_features(test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)"
+ ],
+ "execution_count": 49,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "INFO:tensorflow:Writing example 0 of 5000\n",
+ "INFO:tensorflow:*** Example ***\n",
+ "INFO:tensorflow:guid: None\n",
+ "INFO:tensorflow:tokens: [CLS] i ' m watching this on the sci - fi channel right now . it ' s so horrible i can ' t stop watching it ! i ' m a video ##grapher and this movie makes me sad . i feel bad for anyone associated with this movie . some of the camera work is good . most is very questionable . there are a few decent actors in the flick . too bad they ' re surrounded by what must have been the director ' s relatives . that ' s the only way they could have been qualified to be in a movie ! music was a little better than the acting . if you get around to watching this i hope it [SEP]\n",
+ "INFO:tensorflow:input_ids: 101 1045 1005 1049 3666 2023 2006 1996 16596 1011 10882 3149 2157 2085 1012 2009 1005 1055 2061 9202 1045 2064 1005 1056 2644 3666 2009 999 1045 1005 1049 1037 2678 18657 1998 2023 3185 3084 2033 6517 1012 1045 2514 2919 2005 3087 3378 2007 2023 3185 1012 2070 1997 1996 4950 2147 2003 2204 1012 2087 2003 2200 21068 1012 2045 2024 1037 2261 11519 5889 1999 1996 17312 1012 2205 2919 2027 1005 2128 5129 2011 2054 2442 2031 2042 1996 2472 1005 1055 9064 1012 2008 1005 1055 1996 2069 2126 2027 2071 2031 2042 4591 2000 2022 1999 1037 3185 999 2189 2001 1037 2210 2488 2084 1996 3772 1012 2065 2017 2131 2105 2000 3666 2023 1045 3246 2009 102\n",
+ "INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
+ "INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ "INFO:tensorflow:label: 0 (id = 0)\n",
+ "INFO:tensorflow:*** Example ***\n",
+ "INFO:tensorflow:guid: None\n",
+ "INFO:tensorflow:tokens: [CLS] i have been a fan of pushing dai ##sies since the very beginning . it is wonderful ##ly thought up , and bryan fuller has the most remarkable ideas for this show . < br / > < br / > it is unbelievable on how much tv has been needing a creative , original show like pushing dai ##sies . it is a huge relief to see a show , that is unlike the rest , where as , if you compared it to some of the newer shows , such as scrub ##s and house , you would see the similarities , and it does get ted ##ious at moments to see shows so close in identity . < br / > < br [SEP]\n",
+ "INFO:tensorflow:input_ids: 101 1045 2031 2042 1037 5470 1997 6183 18765 14625 2144 1996 2200 2927 1012 2009 2003 6919 2135 2245 2039 1010 1998 8527 12548 2038 1996 2087 9487 4784 2005 2023 2265 1012 1026 7987 1013 1028 1026 7987 1013 1028 2009 2003 23653 2006 2129 2172 2694 2038 2042 11303 1037 5541 1010 2434 2265 2066 6183 18765 14625 1012 2009 2003 1037 4121 4335 2000 2156 1037 2265 1010 2008 2003 4406 1996 2717 1010 2073 2004 1010 2065 2017 4102 2009 2000 2070 1997 1996 10947 3065 1010 2107 2004 18157 2015 1998 2160 1010 2017 2052 2156 1996 12319 1010 1998 2009 2515 2131 6945 6313 2012 5312 2000 2156 3065 2061 2485 1999 4767 1012 1026 7987 1013 1028 1026 7987 102\n",
+ "INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
+ "INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ "INFO:tensorflow:label: 1 (id = 1)\n",
+ "INFO:tensorflow:*** Example ***\n",
+ "INFO:tensorflow:guid: None\n",
+ "INFO:tensorflow:tokens: [CLS] this movie starts out promising ##ly , with an early scene in which frank morgan advises against gary cooper ' s marriage to his daughter , anita louise . frank morgan , playing an una ##bas ##hed gold - digger , loudly complain ##s to cooper about his perceived pen ##ury at the hands of his family - including his daughter , anita louise . i am a fan of all 3 actors . frank morgan is ( to my mind ) a hollywood treasure , cooper a legend , and louise a very lovely , versatile and under - appreciated actress seldom seen in the leading role . i also have nothing against teresa wright , and while not blessed with great range , she [SEP]\n",
+ "INFO:tensorflow:input_ids: 101 2023 3185 4627 2041 10015 2135 1010 2007 2019 2220 3496 1999 2029 3581 5253 25453 2114 5639 6201 1005 1055 3510 2000 2010 2684 1010 12918 8227 1012 3581 5253 1010 2652 2019 14477 22083 9072 2751 1011 28661 1010 9928 17612 2015 2000 6201 2055 2010 8690 7279 13098 2012 1996 2398 1997 2010 2155 1011 2164 2010 2684 1010 12918 8227 1012 1045 2572 1037 5470 1997 2035 1017 5889 1012 3581 5253 2003 1006 2000 2026 2568 1007 1037 5365 8813 1010 6201 1037 5722 1010 1998 8227 1037 2200 8403 1010 22979 1998 2104 1011 12315 3883 15839 2464 1999 1996 2877 2535 1012 1045 2036 2031 2498 2114 12409 6119 1010 1998 2096 2025 10190 2007 2307 2846 1010 2016 102\n",
+ "INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
+ "INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ "INFO:tensorflow:label: 0 (id = 0)\n",
+ "INFO:tensorflow:*** Example ***\n",
+ "INFO:tensorflow:guid: None\n",
+ "INFO:tensorflow:tokens: [CLS] i was over ##taken by the emotion . un ##for ##get ##table rendering of a wartime story which is unknown to most people . the performances were fault ##less and outstanding . [SEP]\n",
+ "INFO:tensorflow:input_ids: 101 1045 2001 2058 25310 2011 1996 7603 1012 4895 29278 18150 10880 14259 1997 1037 12498 2466 2029 2003 4242 2000 2087 2111 1012 1996 4616 2020 6346 3238 1998 5151 1012 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ "INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ "INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ "INFO:tensorflow:label: 1 (id = 1)\n",
+ "INFO:tensorflow:*** Example ***\n",
+ "INFO:tensorflow:guid: None\n",
+ "INFO:tensorflow:tokens: [CLS] soldier blue is a movie with pre ##tension ##s : pre ##tension ##s to be some sort of profound statement on man ' s inhuman ##ity to man , on the white man ' s exploitation of and brutality towards indigenous peoples ; a biting , un ##fl ##in ##ching and sar ##don ##ic commentary on the horrors of vietnam . well , sorry , but it fails mis ##era ##bly to be any of those things . what soldier blue actually is is per ##nic ##ious , tri ##te , badly made , dish ##ones ##t rubbish . < br / > < br / > another reviewer here hit the nail on the head in saying that it appears to be a hybrid of [SEP]\n",
+ "INFO:tensorflow:input_ids: 101 5268 2630 2003 1037 3185 2007 3653 29048 2015 1024 3653 29048 2015 2000 2022 2070 4066 1997 13769 4861 2006 2158 1005 1055 29582 3012 2000 2158 1010 2006 1996 2317 2158 1005 1055 14427 1997 1998 24083 2875 6284 7243 1025 1037 12344 1010 4895 10258 2378 8450 1998 18906 5280 2594 8570 2006 1996 22812 1997 5148 1012 2092 1010 3374 1010 2021 2009 11896 28616 6906 6321 2000 2022 2151 1997 2216 2477 1012 2054 5268 2630 2941 2003 2003 2566 8713 6313 1010 13012 2618 1010 6649 2081 1010 9841 21821 2102 29132 1012 1026 7987 1013 1028 1026 7987 1013 1028 2178 12027 2182 2718 1996 13774 2006 1996 2132 1999 3038 2008 2009 3544 2000 2022 1037 8893 1997 102\n",
+ "INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
+ "INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ "INFO:tensorflow:label: 0 (id = 0)\n",
+ "INFO:tensorflow:Writing example 0 of 5000\n",
+ "INFO:tensorflow:*** Example ***\n",
+ "INFO:tensorflow:guid: None\n",
+ "INFO:tensorflow:tokens: [CLS] i just watched this today on tv . it was on abc ' s sunday afternoon movie . < br / > < br / > this wasn ' t a very good movie , but for a low budget independent film like this , it was okay . there is some suspense in it , but there are so many bad qualities that really bring the movie down . the script is pretty lame , and the plot elements aren ' t very realistic , such as the way a 911 operator would laugh and hang up when someone is reporting a murder . i don ' t know what the writer was thinking when they came up with that idea , but it isn [SEP]\n",
+ "INFO:tensorflow:input_ids: 101 1045 2074 3427 2023 2651 2006 2694 1012 2009 2001 2006 5925 1005 1055 4465 5027 3185 1012 1026 7987 1013 1028 1026 7987 1013 1028 2023 2347 1005 1056 1037 2200 2204 3185 1010 2021 2005 1037 2659 5166 2981 2143 2066 2023 1010 2009 2001 3100 1012 2045 2003 2070 23873 1999 2009 1010 2021 2045 2024 2061 2116 2919 11647 2008 2428 3288 1996 3185 2091 1012 1996 5896 2003 3492 20342 1010 1998 1996 5436 3787 4995 1005 1056 2200 12689 1010 2107 2004 1996 2126 1037 19989 6872 2052 4756 1998 6865 2039 2043 2619 2003 7316 1037 4028 1012 1045 2123 1005 1056 2113 2054 1996 3213 2001 3241 2043 2027 2234 2039 2007 2008 2801 1010 2021 2009 3475 102\n",
+ "INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
+ "INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ "INFO:tensorflow:label: 0 (id = 0)\n",
+ "INFO:tensorflow:*** Example ***\n",
+ "INFO:tensorflow:guid: None\n",
+ "INFO:tensorflow:tokens: [CLS] from hardly alien sounding lasers , to an elementary school style shuttle crash , \" night ##be ##ast \" is better classified as a far ##cic ##al mix of fake blood and bare chest . the almost pornographic style of the film seems to be a failed attempt to recover from a lack of co ##hesive or effective story . the acting however is not nearly as beast ##ly , many of the young , aspiring , actors ad ##mir ##ably showcase a hidden talent . particularly don lei ##fer ##t and jamie ze ##mare ##l , who shed a well needed sha ##rd of light on this otherwise terrible film . night ##be ##ast would have never shown up on set had he known the [SEP]\n",
+ "INFO:tensorflow:input_ids: 101 2013 6684 7344 9391 23965 1010 2000 2019 4732 2082 2806 10382 5823 1010 1000 2305 4783 14083 1000 2003 2488 6219 2004 1037 2521 19053 2389 4666 1997 8275 2668 1998 6436 3108 1012 1996 2471 26932 2806 1997 1996 2143 3849 2000 2022 1037 3478 3535 2000 8980 2013 1037 3768 1997 2522 21579 2030 4621 2466 1012 1996 3772 2174 2003 2025 3053 2004 6841 2135 1010 2116 1997 1996 2402 1010 22344 1010 5889 4748 14503 8231 13398 1037 5023 5848 1012 3391 2123 26947 7512 2102 1998 6175 27838 24376 2140 1010 2040 8328 1037 2092 2734 21146 4103 1997 2422 2006 2023 4728 6659 2143 1012 2305 4783 14083 2052 2031 2196 3491 2039 2006 2275 2018 2002 2124 1996 102\n",
+ "INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
+ "INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ "INFO:tensorflow:label: 0 (id = 0)\n",
+ "INFO:tensorflow:*** Example ***\n",
+ "INFO:tensorflow:guid: None\n",
+ "INFO:tensorflow:tokens: [CLS] here we have the in ##imi ##table charlie chaplin for ##sa ##king his slap ##stick past to tackle the serious subject of anti - semi ##tism , and into ##ler ##ance in general . he portrays two characters - the sweet , innocent jewish barber - a war veteran , and the ravi ##ng and ruthless dictator , aden ##oid h ##yn ##kel . the jewish ghetto in this country is not safe for long , due to the w ##him ##s of h ##yn ##kel and his armed thugs , who routinely rough up its residents , or leave them alone , dependent upon his mood that day or week . the barber is among them , but is befriended by his former commanding officer [SEP]\n",
+ "INFO:tensorflow:input_ids: 101 2182 2057 2031 1996 1999 27605 10880 4918 23331 2005 3736 6834 2010 14308 21354 2627 2000 11147 1996 3809 3395 1997 3424 1011 4100 17456 1010 1998 2046 3917 6651 1999 2236 1012 2002 17509 2048 3494 1011 1996 4086 1010 7036 3644 13362 1011 1037 2162 8003 1010 1998 1996 16806 3070 1998 18101 21237 1010 16298 9314 1044 6038 11705 1012 1996 3644 17276 1999 2023 2406 2003 2025 3647 2005 2146 1010 2349 2000 1996 1059 14341 2015 1997 1044 6038 11705 1998 2010 4273 24106 1010 2040 19974 5931 2039 2049 3901 1010 2030 2681 2068 2894 1010 7790 2588 2010 6888 2008 2154 2030 2733 1012 1996 13362 2003 2426 2068 1010 2021 2003 23386 2011 2010 2280 7991 2961 102\n",
+ "INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
+ "INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ "INFO:tensorflow:label: 1 (id = 1)\n",
+ "INFO:tensorflow:*** Example ***\n",
+ "INFO:tensorflow:guid: None\n",
+ "INFO:tensorflow:tokens: [CLS] i really hated this movie and it ' s the first movie written by stephen king that i didn ' t finish . i was truly disappointed , it was the worst crap i ' ve ever seen . what were you thinking making three hours out of it ? it may have a quite good story , but actors ? no . suspense ? no . romance ? no . horror ? no . it didn ' t have anything . < br / > < br / > it ' s got this strange , crazy science man with einstein - hair , the classic thing . not real at all . and a man keep getting younger all the time . it seems [SEP]\n",
+ "INFO:tensorflow:input_ids: 101 1045 2428 6283 2023 3185 1998 2009 1005 1055 1996 2034 3185 2517 2011 4459 2332 2008 1045 2134 1005 1056 3926 1012 1045 2001 5621 9364 1010 2009 2001 1996 5409 10231 1045 1005 2310 2412 2464 1012 2054 2020 2017 3241 2437 2093 2847 2041 1997 2009 1029 2009 2089 2031 1037 3243 2204 2466 1010 2021 5889 1029 2053 1012 23873 1029 2053 1012 7472 1029 2053 1012 5469 1029 2053 1012 2009 2134 1005 1056 2031 2505 1012 1026 7987 1013 1028 1026 7987 1013 1028 2009 1005 1055 2288 2023 4326 1010 4689 2671 2158 2007 15313 1011 2606 1010 1996 4438 2518 1012 2025 2613 2012 2035 1012 1998 1037 2158 2562 2893 3920 2035 1996 2051 1012 2009 3849 102\n",
+ "INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
+ "INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ "INFO:tensorflow:label: 0 (id = 0)\n",
+ "INFO:tensorflow:*** Example ***\n",
+ "INFO:tensorflow:guid: None\n",
+ "INFO:tensorflow:tokens: [CLS] story chinese tall story tells the story of righteous monk trip ##ita ##ka , who , along with his guardians monkey , sandy and pigs ##y make their journey west on a quest to recover ancient sutra ##s , finally , they reach the final leg of their journey in sha ##che city but all is not as it seems when the city is attacked by evil tree demons . monkey tries his best to battle them but is overwhelmed , knowing his master is in grave danger , he uses his trust ##y golden staff to thrust trip ##ita ##ka to safety . < br / > < br / > the monk ends up being knocked out when he land and when he wakes [SEP]\n",
+ "INFO:tensorflow:input_ids: 101 2466 2822 4206 2466 4136 1996 2466 1997 19556 8284 4440 6590 2912 1010 2040 1010 2247 2007 2010 14240 10608 1010 7525 1998 14695 2100 2191 2037 4990 2225 2006 1037 8795 2000 8980 3418 26567 2015 1010 2633 1010 2027 3362 1996 2345 4190 1997 2037 4990 1999 21146 5403 2103 2021 2035 2003 2025 2004 2009 3849 2043 1996 2103 2003 4457 2011 4763 3392 7942 1012 10608 5363 2010 2190 2000 2645 2068 2021 2003 13394 1010 4209 2010 3040 2003 1999 6542 5473 1010 2002 3594 2010 3404 2100 3585 3095 2000 7400 4440 6590 2912 2000 3808 1012 1026 7987 1013 1028 1026 7987 1013 1028 1996 8284 4515 2039 2108 6573 2041 2043 2002 2455 1998 2043 2002 17507 102\n",
+ "INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
+ "INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ "INFO:tensorflow:label: 1 (id = 1)\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "metadata": {
+ "id": "ccp5trMwRtmr",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "#Creating a model\n",
+ "\n",
+ "Now that we've prepared our data, let's focus on building a model. `create_model` does just this below. First, it loads the BERT tf hub module again (this time to extract the computation graph). Next, it creates a single new layer that will be trained to adapt BERT to our sentiment task (i.e. classifying whether a movie review is positive or negative). This strategy of using a mostly trained model is called [fine-tuning](http://wiki.fast.ai/index.php/Fine_tuning)."
+ ]
+ },
+ {
+ "metadata": {
+ "id": "6o2a5ZIvRcJq",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "cell_type": "code",
+ "source": [
+ "def create_model(is_predicting, input_ids, input_mask, segment_ids, labels,\n",
+ " num_labels):\n",
+ " \"\"\"Creates a classification model.\"\"\"\n",
+ "\n",
+ " bert_module = hub.Module(\n",
+ " BERT_MODEL_HUB,\n",
+ " trainable=True)\n",
+ " bert_inputs = dict(\n",
+ " input_ids=input_ids,\n",
+ " input_mask=input_mask,\n",
+ " segment_ids=segment_ids)\n",
+ " bert_outputs = bert_module(\n",
+ " inputs=bert_inputs,\n",
+ " signature=\"tokens\",\n",
+ " as_dict=True)\n",
+ "\n",
+ " # Use \"pooled_output\" for classification tasks on an entire sentence.\n",
+ " # Use \"sequence_outputs\" for token-level output.\n",
+ " output_layer = bert_outputs[\"pooled_output\"]\n",
+ "\n",
+ " hidden_size = output_layer.shape[-1].value\n",
+ "\n",
+ " # Create our own layer to tune for politeness data.\n",
+ " output_weights = tf.get_variable(\n",
+ " \"output_weights\", [num_labels, hidden_size],\n",
+ " initializer=tf.truncated_normal_initializer(stddev=0.02))\n",
+ "\n",
+ " output_bias = tf.get_variable(\n",
+ " \"output_bias\", [num_labels], initializer=tf.zeros_initializer())\n",
+ "\n",
+ " with tf.variable_scope(\"loss\"):\n",
+ "\n",
+ " # Dropout helps prevent overfitting\n",
+ " output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)\n",
+ "\n",
+ " logits = tf.matmul(output_layer, output_weights, transpose_b=True)\n",
+ " logits = tf.nn.bias_add(logits, output_bias)\n",
+ " log_probs = tf.nn.log_softmax(logits, axis=-1)\n",
+ "\n",
+ " # Convert labels into one-hot encoding\n",
+ " one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)\n",
+ "\n",
+ " predicted_labels = tf.squeeze(tf.argmax(log_probs, axis=-1, output_type=tf.int32))\n",
+ " # If we're predicting, we want predicted labels and the probabiltiies.\n",
+ " if is_predicting:\n",
+ " return (predicted_labels, log_probs)\n",
+ "\n",
+ " # If we're train/eval, compute loss between predicted and actual label\n",
+ " per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)\n",
+ " loss = tf.reduce_mean(per_example_loss)\n",
+ " return (loss, predicted_labels, log_probs)\n"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "metadata": {
+ "id": "qpE0ZIDOCQzE",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "Next we'll wrap our model function in a `model_fn_builder` function that adapts our model to work for training, evaluation, and prediction."
+ ]
+ },
+ {
+ "metadata": {
+ "id": "FnH-AnOQ9KKW",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "cell_type": "code",
+ "source": [
+ "# model_fn_builder actually creates our model function\n",
+ "# using the passed parameters for num_labels, learning_rate, etc.\n",
+ "def model_fn_builder(num_labels, learning_rate, num_train_steps,\n",
+ " num_warmup_steps):\n",
+ " \"\"\"Returns `model_fn` closure for TPUEstimator.\"\"\"\n",
+ " def model_fn(features, labels, mode, params): # pylint: disable=unused-argument\n",
+ " \"\"\"The `model_fn` for TPUEstimator.\"\"\"\n",
+ "\n",
+ " input_ids = features[\"input_ids\"]\n",
+ " input_mask = features[\"input_mask\"]\n",
+ " segment_ids = features[\"segment_ids\"]\n",
+ " label_ids = features[\"label_ids\"]\n",
+ "\n",
+ " is_predicting = (mode == tf.estimator.ModeKeys.PREDICT)\n",
+ " \n",
+ " # TRAIN and EVAL\n",
+ " if not is_predicting:\n",
+ "\n",
+ " (loss, predicted_labels, log_probs) = create_model(\n",
+ " is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)\n",
+ "\n",
+ " train_op = bert.optimization.create_optimizer(\n",
+ " loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu=False)\n",
+ "\n",
+ " # Calculate evaluation metrics. \n",
+ " def metric_fn(label_ids, predicted_labels):\n",
+ " accuracy = tf.metrics.accuracy(label_ids, predicted_labels)\n",
+ " f1_score = tf.contrib.metrics.f1_score(\n",
+ " label_ids,\n",
+ " predicted_labels)\n",
+ " auc = tf.metrics.auc(\n",
+ " label_ids,\n",
+ " predicted_labels)\n",
+ " recall = tf.metrics.recall(\n",
+ " label_ids,\n",
+ " predicted_labels)\n",
+ " precision = tf.metrics.precision(\n",
+ " label_ids,\n",
+ " predicted_labels) \n",
+ " true_pos = tf.metrics.true_positives(\n",
+ " label_ids,\n",
+ " predicted_labels)\n",
+ " true_neg = tf.metrics.true_negatives(\n",
+ " label_ids,\n",
+ " predicted_labels) \n",
+ " false_pos = tf.metrics.false_positives(\n",
+ " label_ids,\n",
+ " predicted_labels) \n",
+ " false_neg = tf.metrics.false_negatives(\n",
+ " label_ids,\n",
+ " predicted_labels)\n",
+ " return {\n",
+ " \"eval_accuracy\": accuracy,\n",
+ " \"f1_score\": f1_score,\n",
+ " \"auc\": auc,\n",
+ " \"precision\": precision,\n",
+ " \"recall\": recall,\n",
+ " \"true_positives\": true_pos,\n",
+ " \"true_negatives\": true_neg,\n",
+ " \"false_positives\": false_pos,\n",
+ " \"false_negatives\": false_neg\n",
+ " }\n",
+ "\n",
+ " eval_metrics = metric_fn(label_ids, predicted_labels)\n",
+ "\n",
+ " if mode == tf.estimator.ModeKeys.TRAIN:\n",
+ " return tf.estimator.EstimatorSpec(mode=mode,\n",
+ " loss=loss,\n",
+ " train_op=train_op)\n",
+ " else:\n",
+ " return tf.estimator.EstimatorSpec(mode=mode,\n",
+ " loss=loss,\n",
+ " eval_metric_ops=eval_metrics)\n",
+ " else:\n",
+ " (predicted_labels, log_probs) = create_model(\n",
+ " is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)\n",
+ "\n",
+ " predictions = {\n",
+ " 'probabilities': log_probs,\n",
+ " 'labels': predicted_labels\n",
+ " }\n",
+ " return tf.estimator.EstimatorSpec(mode, predictions=predictions)\n",
+ "\n",
+ " # Return the actual model function in the closure\n",
+ " return model_fn\n"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "metadata": {
+ "id": "OjwJ4bTeWXD8",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "cell_type": "code",
+ "source": [
+ "# Compute train and warmup steps from batch size\n",
+ "# These hyperparameters are copied from this colab notebook (https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb)\n",
+ "BATCH_SIZE = 32\n",
+ "LEARNING_RATE = 2e-5\n",
+ "NUM_TRAIN_EPOCHS = 3.0\n",
+ "# Warmup is a period of time where hte learning rate \n",
+ "# is small and gradually increases--usually helps training.\n",
+ "WARMUP_PROPORTION = 0.1\n",
+ "# Model configs\n",
+ "SAVE_CHECKPOINTS_STEPS = 500\n",
+ "SAVE_SUMMARY_STEPS = 100"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "metadata": {
+ "id": "emHf9GhfWBZ_",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "cell_type": "code",
+ "source": [
+ "# Compute # train and warmup steps from batch size\n",
+ "num_train_steps = int(len(train_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS)\n",
+ "num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "metadata": {
+ "id": "oEJldMr3WYZa",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "cell_type": "code",
+ "source": [
+ "# Specify outpit directory and number of checkpoint steps to save\n",
+ "run_config = tf.estimator.RunConfig(\n",
+ " model_dir=OUTPUT_DIR,\n",
+ " save_summary_steps=SAVE_SUMMARY_STEPS,\n",
+ " save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS)"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "metadata": {
+ "id": "q_WebpS1X97v",
+ "colab_type": "code",
+ "outputId": "1648932a-7391-49d3-8af7-52d514e226e8",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 156
+ }
+ },
+ "cell_type": "code",
+ "source": [
+ "model_fn = model_fn_builder(\n",
+ " num_labels=len(label_list),\n",
+ " learning_rate=LEARNING_RATE,\n",
+ " num_train_steps=num_train_steps,\n",
+ " num_warmup_steps=num_warmup_steps)\n",
+ "\n",
+ "estimator = tf.estimator.Estimator(\n",
+ " model_fn=model_fn,\n",
+ " config=run_config,\n",
+ " params={\"batch_size\": BATCH_SIZE})\n"
+ ],
+ "execution_count": 55,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "INFO:tensorflow:Using config: {'_model_dir': 'gs://bert-tfhub/aclImdb_v1', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 500, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true\n",
+ "graph_options {\n",
+ " rewrite_options {\n",
+ " meta_optimizer_iterations: ONE\n",
+ " }\n",
+ "}\n",
+ ", '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': , '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "metadata": {
+ "id": "NOO3RfG1DYLo",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "Next we create an input builder function that takes our training feature set (`train_features`) and produces a generator. This is a pretty standard design pattern for working with Tensorflow [Estimators](https://www.tensorflow.org/guide/estimators)."
+ ]
+ },
+ {
+ "metadata": {
+ "id": "1Pv2bAlOX_-K",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "cell_type": "code",
+ "source": [
+ "# Create an input function for training. drop_remainder = True for using TPUs.\n",
+ "train_input_fn = bert.run_classifier.input_fn_builder(\n",
+ " features=train_features,\n",
+ " seq_length=MAX_SEQ_LENGTH,\n",
+ " is_training=True,\n",
+ " drop_remainder=False)"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "metadata": {
+ "id": "t6Nukby2EB6-",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "Now we train our model! For me, using a Colab notebook running on Google's GPUs, my training time was about 14 minutes."
+ ]
+ },
+ {
+ "metadata": {
+ "id": "nucD4gluYJmK",
+ "colab_type": "code",
+ "outputId": "5d728e72-4631-42bf-c48d-3f51d4b968ce",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 68
+ }
+ },
+ "cell_type": "code",
+ "source": [
+ "print(f'Beginning Training!')\n",
+ "current_time = datetime.now()\n",
+ "estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)\n",
+ "print(\"Training took time \", datetime.now() - current_time)"
+ ],
+ "execution_count": 57,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Beginning Training!\n",
+ "INFO:tensorflow:Skipping training since max_steps has already saved.\n",
+ "Training took time 0:00:00.759709\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "metadata": {
+ "id": "CmbLTVniARy3",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "Now let's use our test data to see how well our model did:"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "JIhejfpyJ8Bx",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "cell_type": "code",
+ "source": [
+ "test_input_fn = run_classifier.input_fn_builder(\n",
+ " features=test_features,\n",
+ " seq_length=MAX_SEQ_LENGTH,\n",
+ " is_training=False,\n",
+ " drop_remainder=False)"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "metadata": {
+ "id": "PPVEXhNjYXC-",
+ "colab_type": "code",
+ "outputId": "dd5482cd-c558-465f-c854-ec11a0175316",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 445
+ }
+ },
+ "cell_type": "code",
+ "source": [
+ "estimator.evaluate(input_fn=test_input_fn, steps=None)"
+ ],
+ "execution_count": 59,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "INFO:tensorflow:Calling model_fn.\n",
+ "INFO:tensorflow:Saver not created because there are no variables in the graph to restore\n"
+ ],
+ "name": "stdout"
+ },
+ {
+ "output_type": "stream",
+ "text": [
+ "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gradients_impl.py:110: UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.\n",
+ " \"Converting sparse IndexedSlices to a dense Tensor of unknown shape. \"\n"
+ ],
+ "name": "stderr"
+ },
+ {
+ "output_type": "stream",
+ "text": [
+ "INFO:tensorflow:Done calling model_fn.\n",
+ "INFO:tensorflow:Starting evaluation at 2019-02-12T21:04:20Z\n",
+ "INFO:tensorflow:Graph was finalized.\n",
+ "INFO:tensorflow:Restoring parameters from gs://bert-tfhub/aclImdb_v1/model.ckpt-468\n",
+ "INFO:tensorflow:Running local_init_op.\n",
+ "INFO:tensorflow:Done running local_init_op.\n",
+ "INFO:tensorflow:Finished evaluation at 2019-02-12-21:06:05\n",
+ "INFO:tensorflow:Saving dict for global step 468: auc = 0.86659324, eval_accuracy = 0.8664, f1_score = 0.8659711, false_negatives = 375.0, false_positives = 293.0, global_step = 468, loss = 0.51870537, precision = 0.880457, recall = 0.8519542, true_negatives = 2174.0, true_positives = 2158.0\n",
+ "INFO:tensorflow:Saving 'checkpoint_path' summary for global step 468: gs://bert-tfhub/aclImdb_v1/model.ckpt-468\n"
+ ],
+ "name": "stdout"
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "{'auc': 0.86659324,\n",
+ " 'eval_accuracy': 0.8664,\n",
+ " 'f1_score': 0.8659711,\n",
+ " 'false_negatives': 375.0,\n",
+ " 'false_positives': 293.0,\n",
+ " 'global_step': 468,\n",
+ " 'loss': 0.51870537,\n",
+ " 'precision': 0.880457,\n",
+ " 'recall': 0.8519542,\n",
+ " 'true_negatives': 2174.0,\n",
+ " 'true_positives': 2158.0}"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 59
+ }
+ ]
+ },
+ {
+ "metadata": {
+ "id": "ueKsULteiz1B",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "Now let's write code to make predictions on new sentences:"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "OsrbTD2EJTVl",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "cell_type": "code",
+ "source": [
+ "def getPrediction(in_sentences):\n",
+ " labels = [\"Negative\", \"Positive\"]\n",
+ " input_examples = [run_classifier.InputExample(guid=\"\", text_a = x, text_b = None, label = 0) for x in in_sentences] # here, \"\" is just a dummy label\n",
+ " input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)\n",
+ " predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False)\n",
+ " predictions = estimator.predict(predict_input_fn)\n",
+ " return [(sentence, prediction['probabilities'], labels[prediction['labels']]) for sentence, prediction in zip(in_sentences, predictions)]"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "metadata": {
+ "id": "-thbodgih_VJ",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "cell_type": "code",
+ "source": [
+ "pred_sentences = [\n",
+ " \"That movie was absolutely awful\",\n",
+ " \"The acting was a bit lacking\",\n",
+ " \"The film was creative and surprising\",\n",
+ " \"Absolutely fantastic!\"\n",
+ "]"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "metadata": {
+ "id": "QrZmvZySKQTm",
+ "colab_type": "code",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 649
+ },
+ "outputId": "3891fafb-a460-4eb8-fa6c-335a5bbc10e5"
+ },
+ "cell_type": "code",
+ "source": [
+ "predictions = getPrediction(pred_sentences)"
+ ],
+ "execution_count": 72,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "INFO:tensorflow:Writing example 0 of 4\n",
+ "INFO:tensorflow:*** Example ***\n",
+ "INFO:tensorflow:guid: \n",
+ "INFO:tensorflow:tokens: [CLS] that movie was absolutely awful [SEP]\n",
+ "INFO:tensorflow:input_ids: 101 2008 3185 2001 7078 9643 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ "INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ "INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ "INFO:tensorflow:label: 0 (id = 0)\n",
+ "INFO:tensorflow:*** Example ***\n",
+ "INFO:tensorflow:guid: \n",
+ "INFO:tensorflow:tokens: [CLS] the acting was a bit lacking [SEP]\n",
+ "INFO:tensorflow:input_ids: 101 1996 3772 2001 1037 2978 11158 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ "INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ "INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ "INFO:tensorflow:label: 0 (id = 0)\n",
+ "INFO:tensorflow:*** Example ***\n",
+ "INFO:tensorflow:guid: \n",
+ "INFO:tensorflow:tokens: [CLS] the film was creative and surprising [SEP]\n",
+ "INFO:tensorflow:input_ids: 101 1996 2143 2001 5541 1998 11341 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ "INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ "INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ "INFO:tensorflow:label: 0 (id = 0)\n",
+ "INFO:tensorflow:*** Example ***\n",
+ "INFO:tensorflow:guid: \n",
+ "INFO:tensorflow:tokens: [CLS] absolutely fantastic ! [SEP]\n",
+ "INFO:tensorflow:input_ids: 101 7078 10392 999 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ "INFO:tensorflow:input_mask: 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ "INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ "INFO:tensorflow:label: 0 (id = 0)\n",
+ "INFO:tensorflow:Calling model_fn.\n",
+ "INFO:tensorflow:Saver not created because there are no variables in the graph to restore\n",
+ "INFO:tensorflow:Done calling model_fn.\n",
+ "INFO:tensorflow:Graph was finalized.\n",
+ "INFO:tensorflow:Restoring parameters from gs://bert-tfhub/aclImdb_v1/model.ckpt-468\n",
+ "INFO:tensorflow:Running local_init_op.\n",
+ "INFO:tensorflow:Done running local_init_op.\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "metadata": {
+ "id": "MXkRiEBUqN3n",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "Voila! We have a sentiment classifier!"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "ERkTE8-7oQLZ",
+ "colab_type": "code",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 221
+ },
+ "outputId": "26c33224-dc2c-4b3d-f7b4-ac3ef0a58b27"
+ },
+ "cell_type": "code",
+ "source": [
+ "predictions"
+ ],
+ "execution_count": 73,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "[('That movie was absolutely awful',\n",
+ " array([-4.9142293e-03, -5.3180690e+00], dtype=float32),\n",
+ " 'Negative'),\n",
+ " ('The acting was a bit lacking',\n",
+ " array([-0.03325794, -3.4200459 ], dtype=float32),\n",
+ " 'Negative'),\n",
+ " ('The film was creative and surprising',\n",
+ " array([-5.3589125e+00, -4.7171740e-03], dtype=float32),\n",
+ " 'Positive'),\n",
+ " ('Absolutely fantastic!',\n",
+ " array([-5.0434084 , -0.00647258], dtype=float32),\n",
+ " 'Positive')]"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 73
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/run_classifier.py b/run_classifier.py
new file mode 100644
index 0000000..1ef4f06
--- /dev/null
+++ b/run_classifier.py
@@ -0,0 +1,1056 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""BERT finetuning runner."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import csv
+import os
+import modeling
+import optimization
+import tokenization
+import tensorflow as tf
+
+flags = tf.flags
+
+FLAGS = flags.FLAGS
+
+## Required parameters
+flags.DEFINE_string(
+ "data_dir", None,
+ "The input data dir. Should contain the .tsv files (or other data files) "
+ "for the task.")
+
+flags.DEFINE_string(
+ "bert_config_file", None,
+ "The config json file corresponding to the pre-trained BERT model. "
+ "This specifies the model architecture.")
+
+flags.DEFINE_string("task_name", None, "The name of the task to train.")
+
+flags.DEFINE_string("vocab_file", None,
+ "The vocabulary file that the BERT model was trained on.")
+
+flags.DEFINE_string(
+ "output_dir", None,
+ "The output directory where the model checkpoints will be written.")
+
+## Other parameters
+
+flags.DEFINE_string(
+ "init_checkpoint", None,
+ "Initial checkpoint (usually from a pre-trained BERT model).")
+
+flags.DEFINE_bool(
+ "do_lower_case", True,
+ "Whether to lower case the input text. Should be True for uncased "
+ "models and False for cased models.")
+
+flags.DEFINE_integer(
+ "max_seq_length", 128,
+ "The maximum total input sequence length after WordPiece tokenization. "
+ "Sequences longer than this will be truncated, and sequences shorter "
+ "than this will be padded.")
+
+flags.DEFINE_bool("do_train", False, "Whether to run training.")
+
+flags.DEFINE_bool("do_eval", False, "Whether to run eval on the dev set.")
+
+flags.DEFINE_bool(
+ "do_predict", False,
+ "Whether to run the model in inference mode on the test set.")
+
+flags.DEFINE_integer("train_batch_size", 32, "Total batch size for training.")
+
+flags.DEFINE_integer("eval_batch_size", 8, "Total batch size for eval.")
+
+flags.DEFINE_integer("predict_batch_size", 8, "Total batch size for predict.")
+
+flags.DEFINE_float("learning_rate", 5e-5, "The initial learning rate for Adam.")
+
+flags.DEFINE_float("num_train_epochs", 3.0,
+ "Total number of training epochs to perform.")
+
+flags.DEFINE_float(
+ "warmup_proportion", 0.1,
+ "Proportion of training to perform linear learning rate warmup for. "
+ "E.g., 0.1 = 10% of training.")
+
+flags.DEFINE_integer("save_checkpoints_steps", 1000,
+ "How often to save the model checkpoint.")
+
+flags.DEFINE_integer("iterations_per_loop", 1000,
+ "How many steps to make in each estimator call.")
+
+flags.DEFINE_bool("use_tpu", False, "Whether to use TPU or GPU/CPU.")
+
+tf.flags.DEFINE_string(
+ "tpu_name", None,
+ "The Cloud TPU to use for training. This should be either the name "
+ "used when creating the Cloud TPU, or a grpc://ip.address.of.tpu:8470 "
+ "url.")
+
+tf.flags.DEFINE_string(
+ "tpu_zone", None,
+ "[Optional] GCE zone where the Cloud TPU is located in. If not "
+ "specified, we will attempt to automatically detect the GCE project from "
+ "metadata.")
+
+tf.flags.DEFINE_string(
+ "gcp_project", None,
+ "[Optional] Project name for the Cloud TPU-enabled project. If not "
+ "specified, we will attempt to automatically detect the GCE project from "
+ "metadata.")
+
+tf.flags.DEFINE_string("master", None, "[Optional] TensorFlow master URL.")
+
+flags.DEFINE_integer(
+ "num_tpu_cores", 8,
+ "Only used if `use_tpu` is True. Total number of TPU cores to use.")
+
+
+class InputExample(object):
+ """A single training/test example for simple sequence classification."""
+
+ def __init__(self, guid, text_a, text_b=None, label=None):
+ """Constructs a InputExample.
+
+ Args:
+ guid: Unique id for the example.
+ text_a: string. The untokenized text of the first sequence. For single
+ sequence tasks, only this sequence must be specified.
+ text_b: (Optional) string. The untokenized text of the second sequence.
+ Only must be specified for sequence pair tasks.
+ label: (Optional) string. The label of the example. This should be
+ specified for train and dev examples, but not for test examples.
+ """
+ self.guid = guid
+ self.text_a = text_a
+ self.text_b = text_b
+ self.label = label
+
+
+class PaddingInputExample(object):
+ """Fake example so the num input examples is a multiple of the batch size.
+
+ When running eval/predict on the TPU, we need to pad the number of examples
+ to be a multiple of the batch size, because the TPU requires a fixed batch
+ size. The alternative is to drop the last batch, which is bad because it means
+ the entire output data won't be generated.
+
+ We use this class instead of `None` because treating `None` as padding
+ battches could cause silent errors.
+ """
+
+
+class InputFeatures(object):
+ """A single set of features of data."""
+
+ def __init__(self,
+ input_ids,
+ input_mask,
+ segment_ids,
+ label_id,
+ is_real_example=True):
+ self.input_ids = input_ids
+ self.input_mask = input_mask
+ self.segment_ids = segment_ids
+ self.label_id = label_id
+ self.is_real_example = is_real_example
+
+
+class DataProcessor(object):
+ """Base class for data converters for sequence classification data sets."""
+
+ def get_train_examples(self, data_dir):
+ """Gets a collection of `InputExample`s for the train set."""
+ raise NotImplementedError()
+
+ def get_dev_examples(self, data_dir):
+ """Gets a collection of `InputExample`s for the dev set."""
+ raise NotImplementedError()
+
+ def get_test_examples(self, data_dir):
+ """Gets a collection of `InputExample`s for prediction."""
+ raise NotImplementedError()
+
+ def get_labels(self):
+ """Gets the list of labels for this data set."""
+ raise NotImplementedError()
+
+ @classmethod
+ def _read_tsv(cls, input_file, quotechar=None):
+ """Reads a tab separated value file."""
+ with tf.gfile.Open(input_file, "r") as f:
+ reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
+ lines = []
+ for line in reader:
+ lines.append(line)
+ return lines
+
+
+class XnliProcessor(DataProcessor):
+ """Processor for the XNLI data set."""
+
+ def __init__(self):
+ self.language = "zh"
+
+ def get_train_examples(self, data_dir):
+ """See base class."""
+ lines = self._read_tsv(
+ os.path.join(data_dir, "multinli",
+ "multinli.train.%s.tsv" % self.language))
+ examples = []
+ for (i, line) in enumerate(lines):
+ if i == 0:
+ continue
+ guid = "train-%d" % (i)
+ text_a = tokenization.convert_to_unicode(line[0])
+ text_b = tokenization.convert_to_unicode(line[1])
+ label = tokenization.convert_to_unicode(line[2])
+ if label == tokenization.convert_to_unicode("contradictory"):
+ label = tokenization.convert_to_unicode("contradiction")
+ examples.append(
+ InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
+ return examples
+
+ def get_dev_examples(self, data_dir):
+ """See base class."""
+ lines = self._read_tsv(os.path.join(data_dir, "xnli.dev.tsv"))
+ examples = []
+ for (i, line) in enumerate(lines):
+ if i == 0:
+ continue
+ guid = "dev-%d" % (i)
+ language = tokenization.convert_to_unicode(line[0])
+ if language != tokenization.convert_to_unicode(self.language):
+ continue
+ text_a = tokenization.convert_to_unicode(line[6])
+ text_b = tokenization.convert_to_unicode(line[7])
+ label = tokenization.convert_to_unicode(line[1])
+ examples.append(
+ InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
+ return examples
+
+ def get_labels(self):
+ """See base class."""
+ return ["contradiction", "entailment", "neutral"]
+
+
+class MnliProcessor(DataProcessor):
+ """Processor for the MultiNLI data set (GLUE version)."""
+
+ def get_train_examples(self, data_dir):
+ """See base class."""
+ return self._create_examples(
+ self._read_tsv(os.path.join(data_dir, "train.tsv")), "train")
+
+ def get_dev_examples(self, data_dir):
+ """See base class."""
+ return self._create_examples(
+ self._read_tsv(os.path.join(data_dir, "dev_matched.tsv")),
+ "dev_matched")
+
+ def get_test_examples(self, data_dir):
+ """See base class."""
+ return self._create_examples(
+ self._read_tsv(os.path.join(data_dir, "test_matched.tsv")), "test")
+
+ def get_labels(self):
+ """See base class."""
+ return ["contradiction", "entailment", "neutral"]
+
+ def _create_examples(self, lines, set_type):
+ """Creates examples for the training and dev sets."""
+ examples = []
+ for (i, line) in enumerate(lines):
+ if i == 0:
+ continue
+ guid = "%s-%s" % (set_type, tokenization.convert_to_unicode(line[0]))
+ text_a = tokenization.convert_to_unicode(line[8])
+ text_b = tokenization.convert_to_unicode(line[9])
+ if set_type == "test":
+ label = "contradiction"
+ else:
+ label = tokenization.convert_to_unicode(line[-1])
+ examples.append(
+ InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
+ return examples
+
+
+class MrpcProcessor(DataProcessor):
+ """Processor for the MRPC data set (GLUE version)."""
+
+ def get_train_examples(self, data_dir):
+ """See base class."""
+ return self._create_examples(
+ self._read_tsv(os.path.join(data_dir, "train.tsv")), "train")
+
+ def get_dev_examples(self, data_dir):
+ """See base class."""
+ return self._create_examples(
+ self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev")
+
+ def get_test_examples(self, data_dir):
+ """See base class."""
+ return self._create_examples(
+ self._read_tsv(os.path.join(data_dir, "test.tsv")), "test")
+
+ def get_labels(self):
+ """See base class."""
+ return ["0", "1"]
+
+ def _create_examples(self, lines, set_type):
+ """Creates examples for the training and dev sets."""
+ examples = []
+ for (i, line) in enumerate(lines):
+ if i == 0:
+ continue
+ guid = "%s-%s" % (set_type, i)
+ text_a = tokenization.convert_to_unicode(line[3])
+ text_b = tokenization.convert_to_unicode(line[4])
+ if set_type == "test":
+ label = "0"
+ else:
+ label = tokenization.convert_to_unicode(line[0])
+ examples.append(
+ InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
+ return examples
+
+
+class ColaProcessor(DataProcessor):
+ """Processor for the CoLA data set (GLUE version)."""
+
+ def get_train_examples(self, data_dir):
+ """See base class."""
+ return self._create_examples(
+ self._read_tsv(os.path.join(data_dir, "train.tsv")), "train")
+
+ def get_dev_examples(self, data_dir):
+ """See base class."""
+ return self._create_examples(
+ self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev")
+
+ def get_test_examples(self, data_dir):
+ """See base class."""
+ return self._create_examples(
+ self._read_tsv(os.path.join(data_dir, "test.tsv")), "test")
+
+ def get_labels(self):
+ """See base class."""
+ return ["0", "1"]
+
+ def _create_examples(self, lines, set_type):
+ """Creates examples for the training and dev sets."""
+ examples = []
+ for (i, line) in enumerate(lines):
+ # Only the test set has a header
+ if set_type == "test" and i == 0:
+ continue
+ guid = "%s-%s" % (set_type, i)
+ if set_type == "test":
+ text_a = tokenization.convert_to_unicode(line[1])
+ label = "0"
+ else:
+ text_a = tokenization.convert_to_unicode(line[3])
+ label = tokenization.convert_to_unicode(line[1])
+ examples.append(
+ InputExample(guid=guid, text_a=text_a, text_b=None, label=label))
+ return examples
+
+
+import dealing_dataset
+
+
+class EPProcessor(DataProcessor):
+ """Processor for the Emotion data set ."""
+
+ def get_train_examples(self, data_dir):
+ """定义开发集的数据是什么,data_dir会作为参数传进去, 这里就是加上你的文件名即可 """
+ return self._create_examples("amki_train")
+
+ def get_dev_examples(self, data_dir):
+ """定义开发集的数据是什么,data_dir会作为参数传进去,模型训练的时候会用到,这里就是加上你的文件名即可 """
+ return self._create_examples("amki_dev")
+
+ def get_test_examples(self, data_dir):
+ """定义测试集的数据是什么, 用于预测数据 ,在训练时没有用到这个函数, 这里写预测的数据集"""
+ return self._create_examples("amki_test")
+
+ def get_labels(self):
+ """ 这里是显示你一共有几个分类标签, 在此任务中我有3个标签,如实写上 标签值和 csv里面存的值相同 """
+ return [0, 1, 2]
+
+ def _create_examples(self, data_table):
+ """这个函数是用来把数据处理, 把每一个例子分成3个部分,填入到InputExample的3个参数
+ text_a 是 第一个句子的文本
+ text_b 是 第二个句子的文本 但是由于此任务是单句分类, 所以 这里传入为None
+ guid 是一个二元组 第一个表示此数据是什么数据集类型(train dev test) 第二个表示数据标号
+ label 表示句子类别
+ """
+ examples = []
+ for column in dealing_dataset.create_dataset_ep(data_table):
+ # 加入样本
+ examples.append(
+ InputExample(guid=column[0], text_a=column[2], text_b=None, label=column[1]))
+
+ return examples
+
+
+class EPBPTProcessor(DataProcessor):
+ """Processor for the Emotion data set ."""
+
+ def get_train_examples(self, data_dir):
+ """定义开发集的数据是什么,data_dir会作为参数传进去, 这里就是加上你的文件名即可 """
+ return self._create_examples("amki_train")
+
+ def get_dev_examples(self, data_dir):
+ """定义开发集的数据是什么,data_dir会作为参数传进去,模型训练的时候会用到,这里就是加上你的文件名即可 """
+ return self._create_examples("amki_dev")
+
+ def get_test_examples(self, data_dir):
+ """定义测试集的数据是什么, 用于预测数据 ,在训练时没有用到这个函数, 这里写预测的数据集"""
+ return self._create_examples("amki_test")
+
+ def get_labels(self):
+ """ 这里是显示你一共有几个分类标签, 在此任务中我有3个标签,如实写上 标签值和 csv里面存的值相同 """
+ return [0, 1, 2]
+
+ def _create_examples(self, data_table):
+ """这个函数是用来把数据处理, 把每一个例子分成3个部分,填入到InputExample的3个参数
+ text_a 是 第一个句子的文本
+ text_b 是 第二个句子的文本 但是由于此任务是单句分类, 所以 这里传入为None
+ guid 是一个二元组 第一个表示此数据是什么数据集类型(train dev test) 第二个表示数据标号
+ label 表示句子类别
+ """
+ examples = []
+ for column in dealing_dataset.create_dataset_pdt():
+ # 加入样本
+ examples.append(
+ InputExample(guid=column[0], text_a=column[2], text_b=None, label=column[1]))
+
+ return examples
+
+
+def convert_single_example(ex_index, example, label_list, max_seq_length,
+ tokenizer):
+ """Converts a single `InputExample` into a single `InputFeatures`."""
+
+ if isinstance(example, PaddingInputExample):
+ return InputFeatures(
+ input_ids=[0] * max_seq_length,
+ input_mask=[0] * max_seq_length,
+ segment_ids=[0] * max_seq_length,
+ label_id=0,
+ is_real_example=False)
+
+ label_map = {}
+ for (i, label) in enumerate(label_list):
+ label_map[label] = i
+
+ tokens_a = tokenizer.tokenize(example.text_a)
+ tokens_b = None
+ if example.text_b:
+ tokens_b = tokenizer.tokenize(example.text_b)
+
+ if tokens_b:
+ # Modifies `tokens_a` and `tokens_b` in place so that the total
+ # length is less than the specified length.
+ # Account for [CLS], [SEP], [SEP] with "- 3"
+ _truncate_seq_pair(tokens_a, tokens_b, max_seq_length - 3)
+ else:
+ # Account for [CLS] and [SEP] with "- 2"
+ if len(tokens_a) > max_seq_length - 2:
+ tokens_a = tokens_a[0:(max_seq_length - 2)]
+
+ # The convention in BERT is:
+ # (a) For sequence pairs:
+ # tokens: [CLS] is this jack ##son ##ville ? [SEP] no it is not . [SEP]
+ # type_ids: 0 0 0 0 0 0 0 0 1 1 1 1 1 1
+ # (b) For single sequences:
+ # tokens: [CLS] the dog is hairy . [SEP]
+ # type_ids: 0 0 0 0 0 0 0
+ #
+ # Where "type_ids" are used to indicate whether this is the first
+ # sequence or the second sequence. The embedding vectors for `type=0` and
+ # `type=1` were learned during pre-training and are added to the wordpiece
+ # embedding vector (and position vector). This is not *strictly* necessary
+ # since the [SEP] token unambiguously separates the sequences, but it makes
+ # it easier for the model to learn the concept of sequences.
+ #
+ # For classification tasks, the first vector (corresponding to [CLS]) is
+ # used as the "sentence vector". Note that this only makes sense because
+ # the entire model is fine-tuned.
+ tokens = []
+ segment_ids = []
+ tokens.append("[CLS]")
+ segment_ids.append(0)
+ for token in tokens_a:
+ tokens.append(token)
+ segment_ids.append(0)
+ tokens.append("[SEP]")
+ segment_ids.append(0)
+
+ if tokens_b:
+ for token in tokens_b:
+ tokens.append(token)
+ segment_ids.append(1)
+ tokens.append("[SEP]")
+ segment_ids.append(1)
+
+ input_ids = tokenizer.convert_tokens_to_ids(tokens)
+
+ # The mask has 1 for real tokens and 0 for padding tokens. Only real
+ # tokens are attended to.
+ input_mask = [1] * len(input_ids)
+
+ # Zero-pad up to the sequence length.
+ while len(input_ids) < max_seq_length:
+ input_ids.append(0)
+ input_mask.append(0)
+ segment_ids.append(0)
+
+ assert len(input_ids) == max_seq_length
+ assert len(input_mask) == max_seq_length
+ assert len(segment_ids) == max_seq_length
+
+ label_id = label_map[example.label]
+ if ex_index < 5:
+ tf.logging.info("*** Example ***")
+ tf.logging.info("guid: %s" % (example.guid))
+ tf.logging.info("tokens: %s" % " ".join(
+ [tokenization.printable_text(x) for x in tokens]))
+ tf.logging.info("input_ids: %s" % " ".join([str(x) for x in input_ids]))
+ tf.logging.info("input_mask: %s" % " ".join([str(x) for x in input_mask]))
+ tf.logging.info("segment_ids: %s" % " ".join([str(x) for x in segment_ids]))
+ tf.logging.info("label: %s (id = %d)" % (example.label, label_id))
+
+ feature = InputFeatures(
+ input_ids=input_ids,
+ input_mask=input_mask,
+ segment_ids=segment_ids,
+ label_id=label_id,
+ is_real_example=True)
+ return feature
+
+
+def file_based_convert_examples_to_features(
+ examples, label_list, max_seq_length, tokenizer, output_file):
+ """Convert a set of `InputExample`s to a TFRecord file."""
+
+ writer = tf.python_io.TFRecordWriter(output_file)
+
+ for (ex_index, example) in enumerate(examples):
+ if ex_index % 10000 == 0:
+ tf.logging.info("Writing example %d of %d" % (ex_index, len(examples)))
+
+ feature = convert_single_example(ex_index, example, label_list,
+ max_seq_length, tokenizer)
+
+ def create_int_feature(values):
+ f = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values)))
+ return f
+
+ features = collections.OrderedDict()
+ features["input_ids"] = create_int_feature(feature.input_ids)
+ features["input_mask"] = create_int_feature(feature.input_mask)
+ features["segment_ids"] = create_int_feature(feature.segment_ids)
+ features["label_ids"] = create_int_feature([feature.label_id])
+ features["is_real_example"] = create_int_feature(
+ [int(feature.is_real_example)])
+
+ tf_example = tf.train.Example(features=tf.train.Features(feature=features))
+ writer.write(tf_example.SerializeToString())
+ writer.close()
+
+
+def file_based_input_fn_builder(input_file, seq_length, is_training,
+ drop_remainder):
+ """Creates an `input_fn` closure to be passed to TPUEstimator."""
+
+ name_to_features = {
+ "input_ids": tf.FixedLenFeature([seq_length], tf.int64),
+ "input_mask": tf.FixedLenFeature([seq_length], tf.int64),
+ "segment_ids": tf.FixedLenFeature([seq_length], tf.int64),
+ "label_ids": tf.FixedLenFeature([], tf.int64),
+ "is_real_example": tf.FixedLenFeature([], tf.int64),
+ }
+
+ def _decode_record(record, name_to_features):
+ """Decodes a record to a TensorFlow example."""
+ example = tf.parse_single_example(record, name_to_features)
+
+ # tf.Example only supports tf.int64, but the TPU only supports tf.int32.
+ # So cast all int64 to int32.
+ for name in list(example.keys()):
+ t = example[name]
+ if t.dtype == tf.int64:
+ t = tf.to_int32(t)
+ example[name] = t
+
+ return example
+
+ def input_fn(params):
+ """The actual input function."""
+ batch_size = params["batch_size"]
+
+ # For training, we want a lot of parallel reading and shuffling.
+ # For eval, we want no shuffling and parallel reading doesn't matter.
+ d = tf.data.TFRecordDataset(input_file)
+ if is_training:
+ d = d.repeat()
+ d = d.shuffle(buffer_size=100)
+
+ d = d.apply(
+ tf.contrib.data.map_and_batch(
+ lambda record: _decode_record(record, name_to_features),
+ batch_size=batch_size,
+ drop_remainder=drop_remainder))
+
+ return d
+
+ return input_fn
+
+
+def _truncate_seq_pair(tokens_a, tokens_b, max_length):
+ """Truncates a sequence pair in place to the maximum length."""
+
+ # This is a simple heuristic which will always truncate the longer sequence
+ # one token at a time. This makes more sense than truncating an equal percent
+ # of tokens from each, since if one sequence is very short then each token
+ # that's truncated likely contains more information than a longer sequence.
+ while True:
+ total_length = len(tokens_a) + len(tokens_b)
+ if total_length <= max_length:
+ break
+ if len(tokens_a) > len(tokens_b):
+ tokens_a.pop()
+ else:
+ tokens_b.pop()
+
+
+def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
+ labels, num_labels, use_one_hot_embeddings):
+ """Creates a classification model."""
+ model = modeling.BertModel(
+ config=bert_config,
+ is_training=is_training,
+ input_ids=input_ids,
+ input_mask=input_mask,
+ token_type_ids=segment_ids,
+ use_one_hot_embeddings=use_one_hot_embeddings)
+
+ # In the demo, we are doing a simple classification task on the entire
+ # segment.
+ #
+ # If you want to use the token-level output, use model.get_sequence_output()
+ # instead.
+ output_layer = model.get_pooled_output()
+
+ hidden_size = output_layer.shape[-1].value
+
+ output_weights = tf.get_variable(
+ "output_weights", [num_labels, hidden_size],
+ initializer=tf.truncated_normal_initializer(stddev=0.02))
+
+ output_bias = tf.get_variable(
+ "output_bias", [num_labels], initializer=tf.zeros_initializer())
+
+ with tf.variable_scope("loss"):
+ if is_training:
+ # I.e., 0.1 dropout
+ output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)
+
+ logits = tf.matmul(output_layer, output_weights, transpose_b=True)
+ logits = tf.nn.bias_add(logits, output_bias)
+ probabilities = tf.nn.softmax(logits, axis=-1)
+ log_probs = tf.nn.log_softmax(logits, axis=-1)
+
+ one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
+
+ per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
+ loss = tf.reduce_mean(per_example_loss)
+
+ return (loss, per_example_loss, logits, probabilities)
+
+
+def model_fn_builder(bert_config, num_labels, init_checkpoint, learning_rate,
+ num_train_steps, num_warmup_steps, use_tpu,
+ use_one_hot_embeddings):
+ """Returns `model_fn` closure for TPUEstimator."""
+
+ def model_fn(features, labels, mode, params): # pylint: disable=unused-argument
+ """The `model_fn` for TPUEstimator."""
+
+ tf.logging.info("*** Features ***")
+ for name in sorted(features.keys()):
+ tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape))
+
+ input_ids = features["input_ids"]
+ input_mask = features["input_mask"]
+ segment_ids = features["segment_ids"]
+ label_ids = features["label_ids"]
+ is_real_example = None
+ if "is_real_example" in features:
+ is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32)
+ else:
+ is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)
+
+ is_training = (mode == tf.estimator.ModeKeys.TRAIN)
+
+ (total_loss, per_example_loss, logits, probabilities) = create_model(
+ bert_config, is_training, input_ids, input_mask, segment_ids, label_ids,
+ num_labels, use_one_hot_embeddings)
+
+ tvars = tf.trainable_variables()
+ initialized_variable_names = {}
+ scaffold_fn = None
+ if init_checkpoint:
+ (assignment_map, initialized_variable_names
+ ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
+ if use_tpu:
+
+ def tpu_scaffold():
+ tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
+ return tf.train.Scaffold()
+
+ scaffold_fn = tpu_scaffold
+ else:
+ tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
+
+ tf.logging.info("**** Trainable Variables ****")
+ for var in tvars:
+ init_string = ""
+ if var.name in initialized_variable_names:
+ init_string = ", *INIT_FROM_CKPT*"
+ tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape,
+ init_string)
+
+ output_spec = None
+ if mode == tf.estimator.ModeKeys.TRAIN:
+
+ train_op = optimization.create_optimizer(
+ total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)
+
+ output_spec = tf.contrib.tpu.TPUEstimatorSpec(
+ mode=mode,
+ loss=total_loss,
+ train_op=train_op,
+ scaffold_fn=scaffold_fn)
+ elif mode == tf.estimator.ModeKeys.EVAL:
+
+ def metric_fn(per_example_loss, label_ids, logits, is_real_example):
+ predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
+ accuracy = tf.metrics.accuracy(
+ labels=label_ids, predictions=predictions, weights=is_real_example)
+ loss = tf.metrics.mean(values=per_example_loss, weights=is_real_example)
+ return {
+ "eval_accuracy": accuracy,
+ "eval_loss": loss,
+ }
+
+ eval_metrics = (metric_fn,
+ [per_example_loss, label_ids, logits, is_real_example])
+ output_spec = tf.contrib.tpu.TPUEstimatorSpec(
+ mode=mode,
+ loss=total_loss,
+ eval_metrics=eval_metrics,
+ scaffold_fn=scaffold_fn)
+ else:
+ output_spec = tf.contrib.tpu.TPUEstimatorSpec(
+ mode=mode,
+ predictions={"probabilities": probabilities},
+ scaffold_fn=scaffold_fn)
+ return output_spec
+
+ return model_fn
+
+
+# This function is not used by this file but is still used by the Colab and
+# people who depend on it.
+def input_fn_builder(features, seq_length, is_training, drop_remainder):
+ """Creates an `input_fn` closure to be passed to TPUEstimator."""
+
+ all_input_ids = []
+ all_input_mask = []
+ all_segment_ids = []
+ all_label_ids = []
+
+ for feature in features:
+ all_input_ids.append(feature.input_ids)
+ all_input_mask.append(feature.input_mask)
+ all_segment_ids.append(feature.segment_ids)
+ all_label_ids.append(feature.label_id)
+
+ def input_fn(params):
+ """The actual input function."""
+ batch_size = params["batch_size"]
+
+ num_examples = len(features)
+
+ # This is for demo purposes and does NOT scale to large data sets. We do
+ # not use Dataset.from_generator() because that uses tf.py_func which is
+ # not TPU compatible. The right way to load data is with TFRecordReader.
+ d = tf.data.Dataset.from_tensor_slices({
+ "input_ids":
+ tf.constant(
+ all_input_ids, shape=[num_examples, seq_length],
+ dtype=tf.int32),
+ "input_mask":
+ tf.constant(
+ all_input_mask,
+ shape=[num_examples, seq_length],
+ dtype=tf.int32),
+ "segment_ids":
+ tf.constant(
+ all_segment_ids,
+ shape=[num_examples, seq_length],
+ dtype=tf.int32),
+ "label_ids":
+ tf.constant(all_label_ids, shape=[num_examples], dtype=tf.int32),
+ })
+
+ if is_training:
+ d = d.repeat()
+ d = d.shuffle(buffer_size=100)
+
+ d = d.batch(batch_size=batch_size, drop_remainder=drop_remainder)
+ return d
+
+ return input_fn
+
+
+# This function is not used by this file but is still used by the Colab and
+# people who depend on it.
+def convert_examples_to_features(examples, label_list, max_seq_length,
+ tokenizer):
+ """Convert a set of `InputExample`s to a list of `InputFeatures`."""
+
+ features = []
+ for (ex_index, example) in enumerate(examples):
+ if ex_index % 10000 == 0:
+ tf.logging.info("Writing example %d of %d" % (ex_index, len(examples)))
+
+ feature = convert_single_example(ex_index, example, label_list,
+ max_seq_length, tokenizer)
+
+ features.append(feature)
+ return features
+
+
+def main(_):
+ tf.logging.set_verbosity(tf.logging.INFO)
+
+ processors = {
+ "cola": ColaProcessor,
+ "mnli": MnliProcessor,
+ "mrpc": MrpcProcessor,
+ "xnli": XnliProcessor,
+ "ep": EPProcessor,
+ "eppdt": EPBPTProcessor,
+ }
+
+ tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case,
+ FLAGS.init_checkpoint)
+
+ if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict:
+ raise ValueError(
+ "At least one of `do_train`, `do_eval` or `do_predict' must be True.")
+
+ bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)
+
+ if FLAGS.max_seq_length > bert_config.max_position_embeddings:
+ raise ValueError(
+ "Cannot use sequence length %d because the BERT model "
+ "was only trained up to sequence length %d" %
+ (FLAGS.max_seq_length, bert_config.max_position_embeddings))
+
+ tf.gfile.MakeDirs(FLAGS.output_dir)
+
+ task_name = FLAGS.task_name.lower()
+
+ if task_name not in processors:
+ raise ValueError("Task not found: %s" % (task_name))
+
+ processor = processors[task_name]()
+
+ label_list = processor.get_labels()
+
+ tokenizer = tokenization.FullTokenizer(
+ vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case)
+
+ tpu_cluster_resolver = None
+ if FLAGS.use_tpu and FLAGS.tpu_name:
+ tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
+ FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)
+
+ is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
+ run_config = tf.contrib.tpu.RunConfig(
+ cluster=tpu_cluster_resolver,
+ master=FLAGS.master,
+ model_dir=FLAGS.output_dir,
+ save_checkpoints_steps=FLAGS.save_checkpoints_steps,
+ tpu_config=tf.contrib.tpu.TPUConfig(
+ iterations_per_loop=FLAGS.iterations_per_loop,
+ num_shards=FLAGS.num_tpu_cores,
+ per_host_input_for_training=is_per_host))
+
+ train_examples = None
+ num_train_steps = None
+ num_warmup_steps = None
+ if FLAGS.do_train:
+ train_examples = processor.get_train_examples(FLAGS.data_dir)
+ num_train_steps = int(
+ len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs)
+ num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)
+
+ model_fn = model_fn_builder(
+ bert_config=bert_config,
+ num_labels=len(label_list),
+ init_checkpoint=FLAGS.init_checkpoint,
+ learning_rate=FLAGS.learning_rate,
+ num_train_steps=num_train_steps,
+ num_warmup_steps=num_warmup_steps,
+ use_tpu=FLAGS.use_tpu,
+ use_one_hot_embeddings=FLAGS.use_tpu)
+
+ # If TPU is not available, this will fall back to normal Estimator on CPU
+ # or GPU.
+ estimator = tf.contrib.tpu.TPUEstimator(
+ use_tpu=FLAGS.use_tpu,
+ model_fn=model_fn,
+ config=run_config,
+ train_batch_size=FLAGS.train_batch_size,
+ eval_batch_size=FLAGS.eval_batch_size,
+ predict_batch_size=FLAGS.predict_batch_size)
+
+ if FLAGS.do_train:
+ train_file = os.path.join(FLAGS.output_dir, "train.tf_record")
+ file_based_convert_examples_to_features(
+ train_examples, label_list, FLAGS.max_seq_length, tokenizer, train_file)
+ tf.logging.info("***** Running training *****")
+ tf.logging.info(" Num examples = %d", len(train_examples))
+ tf.logging.info(" Batch size = %d", FLAGS.train_batch_size)
+ tf.logging.info(" Num steps = %d", num_train_steps)
+ train_input_fn = file_based_input_fn_builder(
+ input_file=train_file,
+ seq_length=FLAGS.max_seq_length,
+ is_training=True,
+ drop_remainder=True)
+ estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
+
+ if FLAGS.do_eval:
+ eval_examples = processor.get_dev_examples(FLAGS.data_dir)
+ num_actual_eval_examples = len(eval_examples)
+ if FLAGS.use_tpu:
+ # TPU requires a fixed batch size for all batches, therefore the number
+ # of examples must be a multiple of the batch size, or else examples
+ # will get dropped. So we pad with fake examples which are ignored
+ # later on. These do NOT count towards the metric (all tf.metrics
+ # support a per-instance weight, and these get a weight of 0.0).
+ while len(eval_examples) % FLAGS.eval_batch_size != 0:
+ eval_examples.append(PaddingInputExample())
+
+ eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record")
+ file_based_convert_examples_to_features(
+ eval_examples, label_list, FLAGS.max_seq_length, tokenizer, eval_file)
+
+ tf.logging.info("***** Running evaluation *****")
+ tf.logging.info(" Num examples = %d (%d actual, %d padding)",
+ len(eval_examples), num_actual_eval_examples,
+ len(eval_examples) - num_actual_eval_examples)
+ tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size)
+
+ # This tells the estimator to run through the entire set.
+ eval_steps = None
+ # However, if running eval on the TPU, you will need to specify the
+ # number of steps.
+ if FLAGS.use_tpu:
+ assert len(eval_examples) % FLAGS.eval_batch_size == 0
+ eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size)
+
+ eval_drop_remainder = True if FLAGS.use_tpu else False
+ eval_input_fn = file_based_input_fn_builder(
+ input_file=eval_file,
+ seq_length=FLAGS.max_seq_length,
+ is_training=False,
+ drop_remainder=eval_drop_remainder)
+
+ result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)
+
+ output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")
+ with tf.gfile.GFile(output_eval_file, "w") as writer:
+ tf.logging.info("***** Eval results *****")
+ for key in sorted(result.keys()):
+ tf.logging.info(" %s = %s", key, str(result[key]))
+ writer.write("%s = %s\n" % (key, str(result[key])))
+
+ if FLAGS.do_predict:
+ predict_examples = processor.get_test_examples(FLAGS.data_dir)
+ num_actual_predict_examples = len(predict_examples)
+ if FLAGS.use_tpu:
+ # TPU requires a fixed batch size for all batches, therefore the number
+ # of examples must be a multiple of the batch size, or else examples
+ # will get dropped. So we pad with fake examples which are ignored
+ # later on.
+ while len(predict_examples) % FLAGS.predict_batch_size != 0:
+ predict_examples.append(PaddingInputExample())
+
+ predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record")
+ file_based_convert_examples_to_features(predict_examples, label_list,
+ FLAGS.max_seq_length, tokenizer,
+ predict_file)
+
+ tf.logging.info("***** Running prediction*****")
+ tf.logging.info(" Num examples = %d (%d actual, %d padding)",
+ len(predict_examples), num_actual_predict_examples,
+ len(predict_examples) - num_actual_predict_examples)
+ tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size)
+
+ predict_drop_remainder = True if FLAGS.use_tpu else False
+ predict_input_fn = file_based_input_fn_builder(
+ input_file=predict_file,
+ seq_length=FLAGS.max_seq_length,
+ is_training=False,
+ drop_remainder=predict_drop_remainder)
+
+ result = estimator.predict(input_fn=predict_input_fn)
+
+ output_predict_file = os.path.join(FLAGS.output_dir, "test_results.tsv")
+ with tf.gfile.GFile(output_predict_file, "w") as writer:
+ num_written_lines = 0
+ tf.logging.info("***** Predict results *****")
+ for (i, prediction) in enumerate(result):
+ probabilities = prediction["probabilities"]
+ if i >= num_actual_predict_examples:
+ break
+ output_line = "\t".join(
+ str(class_probability)
+ for class_probability in probabilities) + "\n"
+ writer.write(output_line)
+ num_written_lines += 1
+ assert num_written_lines == num_actual_predict_examples
+
+
+if __name__ == "__main__":
+ flags.mark_flag_as_required("data_dir")
+ flags.mark_flag_as_required("task_name")
+ flags.mark_flag_as_required("vocab_file")
+ flags.mark_flag_as_required("bert_config_file")
+ flags.mark_flag_as_required("output_dir")
+ tf.app.run()
diff --git a/run_classifier_with_tfhub.py b/run_classifier_with_tfhub.py
new file mode 100644
index 0000000..9d2f80f
--- /dev/null
+++ b/run_classifier_with_tfhub.py
@@ -0,0 +1,314 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""BERT finetuning runner with TF-Hub."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import optimization
+import run_classifier
+import tokenization
+import tensorflow as tf
+import tensorflow_hub as hub
+
+flags = tf.flags
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_string(
+ "bert_hub_module_handle", None,
+ "Handle for the BERT TF-Hub module.")
+
+
+def create_model(is_training, input_ids, input_mask, segment_ids, labels,
+ num_labels, bert_hub_module_handle):
+ """Creates a classification model."""
+ tags = set()
+ if is_training:
+ tags.add("train")
+ bert_module = hub.Module(bert_hub_module_handle, tags=tags, trainable=True)
+ bert_inputs = dict(
+ input_ids=input_ids,
+ input_mask=input_mask,
+ segment_ids=segment_ids)
+ bert_outputs = bert_module(
+ inputs=bert_inputs,
+ signature="tokens",
+ as_dict=True)
+
+ # In the demo, we are doing a simple classification task on the entire
+ # segment.
+ #
+ # If you want to use the token-level output, use
+ # bert_outputs["sequence_output"] instead.
+ output_layer = bert_outputs["pooled_output"]
+
+ hidden_size = output_layer.shape[-1].value
+
+ output_weights = tf.get_variable(
+ "output_weights", [num_labels, hidden_size],
+ initializer=tf.truncated_normal_initializer(stddev=0.02))
+
+ output_bias = tf.get_variable(
+ "output_bias", [num_labels], initializer=tf.zeros_initializer())
+
+ with tf.variable_scope("loss"):
+ if is_training:
+ # I.e., 0.1 dropout
+ output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)
+
+ logits = tf.matmul(output_layer, output_weights, transpose_b=True)
+ logits = tf.nn.bias_add(logits, output_bias)
+ probabilities = tf.nn.softmax(logits, axis=-1)
+ log_probs = tf.nn.log_softmax(logits, axis=-1)
+
+ one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
+
+ per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
+ loss = tf.reduce_mean(per_example_loss)
+
+ return (loss, per_example_loss, logits, probabilities)
+
+
+def model_fn_builder(num_labels, learning_rate, num_train_steps,
+ num_warmup_steps, use_tpu, bert_hub_module_handle):
+ """Returns `model_fn` closure for TPUEstimator."""
+
+ def model_fn(features, labels, mode, params): # pylint: disable=unused-argument
+ """The `model_fn` for TPUEstimator."""
+
+ tf.logging.info("*** Features ***")
+ for name in sorted(features.keys()):
+ tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape))
+
+ input_ids = features["input_ids"]
+ input_mask = features["input_mask"]
+ segment_ids = features["segment_ids"]
+ label_ids = features["label_ids"]
+
+ is_training = (mode == tf.estimator.ModeKeys.TRAIN)
+
+ (total_loss, per_example_loss, logits, probabilities) = create_model(
+ is_training, input_ids, input_mask, segment_ids, label_ids, num_labels,
+ bert_hub_module_handle)
+
+ output_spec = None
+ if mode == tf.estimator.ModeKeys.TRAIN:
+ train_op = optimization.create_optimizer(
+ total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)
+
+ output_spec = tf.contrib.tpu.TPUEstimatorSpec(
+ mode=mode,
+ loss=total_loss,
+ train_op=train_op)
+ elif mode == tf.estimator.ModeKeys.EVAL:
+
+ def metric_fn(per_example_loss, label_ids, logits):
+ predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
+ accuracy = tf.metrics.accuracy(label_ids, predictions)
+ loss = tf.metrics.mean(per_example_loss)
+ return {
+ "eval_accuracy": accuracy,
+ "eval_loss": loss,
+ }
+
+ eval_metrics = (metric_fn, [per_example_loss, label_ids, logits])
+ output_spec = tf.contrib.tpu.TPUEstimatorSpec(
+ mode=mode,
+ loss=total_loss,
+ eval_metrics=eval_metrics)
+ elif mode == tf.estimator.ModeKeys.PREDICT:
+ output_spec = tf.contrib.tpu.TPUEstimatorSpec(
+ mode=mode, predictions={"probabilities": probabilities})
+ else:
+ raise ValueError(
+ "Only TRAIN, EVAL and PREDICT modes are supported: %s" % (mode))
+
+ return output_spec
+
+ return model_fn
+
+
+def create_tokenizer_from_hub_module(bert_hub_module_handle):
+ """Get the vocab file and casing info from the Hub module."""
+ with tf.Graph().as_default():
+ bert_module = hub.Module(bert_hub_module_handle)
+ tokenization_info = bert_module(signature="tokenization_info", as_dict=True)
+ with tf.Session() as sess:
+ vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"],
+ tokenization_info["do_lower_case"]])
+ return tokenization.FullTokenizer(
+ vocab_file=vocab_file, do_lower_case=do_lower_case)
+
+
+def main(_):
+ tf.logging.set_verbosity(tf.logging.INFO)
+
+ processors = {
+ "cola": run_classifier.ColaProcessor,
+ "mnli": run_classifier.MnliProcessor,
+ "mrpc": run_classifier.MrpcProcessor,
+ }
+
+ if not FLAGS.do_train and not FLAGS.do_eval:
+ raise ValueError("At least one of `do_train` or `do_eval` must be True.")
+
+ tf.gfile.MakeDirs(FLAGS.output_dir)
+
+ task_name = FLAGS.task_name.lower()
+
+ if task_name not in processors:
+ raise ValueError("Task not found: %s" % (task_name))
+
+ processor = processors[task_name]()
+
+ label_list = processor.get_labels()
+
+ tokenizer = create_tokenizer_from_hub_module(FLAGS.bert_hub_module_handle)
+
+ tpu_cluster_resolver = None
+ if FLAGS.use_tpu and FLAGS.tpu_name:
+ tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
+ FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)
+
+ is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
+ run_config = tf.contrib.tpu.RunConfig(
+ cluster=tpu_cluster_resolver,
+ master=FLAGS.master,
+ model_dir=FLAGS.output_dir,
+ save_checkpoints_steps=FLAGS.save_checkpoints_steps,
+ tpu_config=tf.contrib.tpu.TPUConfig(
+ iterations_per_loop=FLAGS.iterations_per_loop,
+ num_shards=FLAGS.num_tpu_cores,
+ per_host_input_for_training=is_per_host))
+
+ train_examples = None
+ num_train_steps = None
+ num_warmup_steps = None
+ if FLAGS.do_train:
+ train_examples = processor.get_train_examples(FLAGS.data_dir)
+ num_train_steps = int(
+ len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs)
+ num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)
+
+ model_fn = model_fn_builder(
+ num_labels=len(label_list),
+ learning_rate=FLAGS.learning_rate,
+ num_train_steps=num_train_steps,
+ num_warmup_steps=num_warmup_steps,
+ use_tpu=FLAGS.use_tpu,
+ bert_hub_module_handle=FLAGS.bert_hub_module_handle)
+
+ # If TPU is not available, this will fall back to normal Estimator on CPU
+ # or GPU.
+ estimator = tf.contrib.tpu.TPUEstimator(
+ use_tpu=FLAGS.use_tpu,
+ model_fn=model_fn,
+ config=run_config,
+ train_batch_size=FLAGS.train_batch_size,
+ eval_batch_size=FLAGS.eval_batch_size,
+ predict_batch_size=FLAGS.predict_batch_size)
+
+ if FLAGS.do_train:
+ train_features = run_classifier.convert_examples_to_features(
+ train_examples, label_list, FLAGS.max_seq_length, tokenizer)
+ tf.logging.info("***** Running training *****")
+ tf.logging.info(" Num examples = %d", len(train_examples))
+ tf.logging.info(" Batch size = %d", FLAGS.train_batch_size)
+ tf.logging.info(" Num steps = %d", num_train_steps)
+ train_input_fn = run_classifier.input_fn_builder(
+ features=train_features,
+ seq_length=FLAGS.max_seq_length,
+ is_training=True,
+ drop_remainder=True)
+ estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
+
+ if FLAGS.do_eval:
+ eval_examples = processor.get_dev_examples(FLAGS.data_dir)
+ eval_features = run_classifier.convert_examples_to_features(
+ eval_examples, label_list, FLAGS.max_seq_length, tokenizer)
+
+ tf.logging.info("***** Running evaluation *****")
+ tf.logging.info(" Num examples = %d", len(eval_examples))
+ tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size)
+
+ # This tells the estimator to run through the entire set.
+ eval_steps = None
+ # However, if running eval on the TPU, you will need to specify the
+ # number of steps.
+ if FLAGS.use_tpu:
+ # Eval will be slightly WRONG on the TPU because it will truncate
+ # the last batch.
+ eval_steps = int(len(eval_examples) / FLAGS.eval_batch_size)
+
+ eval_drop_remainder = True if FLAGS.use_tpu else False
+ eval_input_fn = run_classifier.input_fn_builder(
+ features=eval_features,
+ seq_length=FLAGS.max_seq_length,
+ is_training=False,
+ drop_remainder=eval_drop_remainder)
+
+ result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)
+
+ output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")
+ with tf.gfile.GFile(output_eval_file, "w") as writer:
+ tf.logging.info("***** Eval results *****")
+ for key in sorted(result.keys()):
+ tf.logging.info(" %s = %s", key, str(result[key]))
+ writer.write("%s = %s\n" % (key, str(result[key])))
+
+ if FLAGS.do_predict:
+ predict_examples = processor.get_test_examples(FLAGS.data_dir)
+ if FLAGS.use_tpu:
+ # Discard batch remainder if running on TPU
+ n = len(predict_examples)
+ predict_examples = predict_examples[:(n - n % FLAGS.predict_batch_size)]
+
+ predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record")
+ run_classifier.file_based_convert_examples_to_features(
+ predict_examples, label_list, FLAGS.max_seq_length, tokenizer,
+ predict_file)
+
+ tf.logging.info("***** Running prediction*****")
+ tf.logging.info(" Num examples = %d", len(predict_examples))
+ tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size)
+
+ predict_input_fn = run_classifier.file_based_input_fn_builder(
+ input_file=predict_file,
+ seq_length=FLAGS.max_seq_length,
+ is_training=False,
+ drop_remainder=FLAGS.use_tpu)
+
+ result = estimator.predict(input_fn=predict_input_fn)
+
+ output_predict_file = os.path.join(FLAGS.output_dir, "test_results.tsv")
+ with tf.gfile.GFile(output_predict_file, "w") as writer:
+ tf.logging.info("***** Predict results *****")
+ for prediction in result:
+ probabilities = prediction["probabilities"]
+ output_line = "\t".join(
+ str(class_probability)
+ for class_probability in probabilities) + "\n"
+ writer.write(output_line)
+
+
+if __name__ == "__main__":
+ flags.mark_flag_as_required("data_dir")
+ flags.mark_flag_as_required("task_name")
+ flags.mark_flag_as_required("bert_hub_module_handle")
+ flags.mark_flag_as_required("output_dir")
+ tf.app.run()
diff --git a/run_pretraining.py b/run_pretraining.py
new file mode 100644
index 0000000..b118f62
--- /dev/null
+++ b/run_pretraining.py
@@ -0,0 +1,493 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Run masked LM/next sentence masked_lm pre-training for BERT."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import modeling
+import optimization
+import tensorflow as tf
+
+flags = tf.flags
+
+FLAGS = flags.FLAGS
+
+## Required parameters
+flags.DEFINE_string(
+ "bert_config_file", None,
+ "The config json file corresponding to the pre-trained BERT model. "
+ "This specifies the model architecture.")
+
+flags.DEFINE_string(
+ "input_file", None,
+ "Input TF example files (can be a glob or comma separated).")
+
+flags.DEFINE_string(
+ "output_dir", None,
+ "The output directory where the model checkpoints will be written.")
+
+## Other parameters
+flags.DEFINE_string(
+ "init_checkpoint", None,
+ "Initial checkpoint (usually from a pre-trained BERT model).")
+
+flags.DEFINE_integer(
+ "max_seq_length", 128,
+ "The maximum total input sequence length after WordPiece tokenization. "
+ "Sequences longer than this will be truncated, and sequences shorter "
+ "than this will be padded. Must match data generation.")
+
+flags.DEFINE_integer(
+ "max_predictions_per_seq", 20,
+ "Maximum number of masked LM predictions per sequence. "
+ "Must match data generation.")
+
+flags.DEFINE_bool("do_train", False, "Whether to run training.")
+
+flags.DEFINE_bool("do_eval", False, "Whether to run eval on the dev set.")
+
+flags.DEFINE_integer("train_batch_size", 32, "Total batch size for training.")
+
+flags.DEFINE_integer("eval_batch_size", 8, "Total batch size for eval.")
+
+flags.DEFINE_float("learning_rate", 5e-5, "The initial learning rate for Adam.")
+
+flags.DEFINE_integer("num_train_steps", 100000, "Number of training steps.")
+
+flags.DEFINE_integer("num_warmup_steps", 10000, "Number of warmup steps.")
+
+flags.DEFINE_integer("save_checkpoints_steps", 1000,
+ "How often to save the model checkpoint.")
+
+flags.DEFINE_integer("iterations_per_loop", 1000,
+ "How many steps to make in each estimator call.")
+
+flags.DEFINE_integer("max_eval_steps", 100, "Maximum number of eval steps.")
+
+flags.DEFINE_bool("use_tpu", False, "Whether to use TPU or GPU/CPU.")
+
+tf.flags.DEFINE_string(
+ "tpu_name", None,
+ "The Cloud TPU to use for training. This should be either the name "
+ "used when creating the Cloud TPU, or a grpc://ip.address.of.tpu:8470 "
+ "url.")
+
+tf.flags.DEFINE_string(
+ "tpu_zone", None,
+ "[Optional] GCE zone where the Cloud TPU is located in. If not "
+ "specified, we will attempt to automatically detect the GCE project from "
+ "metadata.")
+
+tf.flags.DEFINE_string(
+ "gcp_project", None,
+ "[Optional] Project name for the Cloud TPU-enabled project. If not "
+ "specified, we will attempt to automatically detect the GCE project from "
+ "metadata.")
+
+tf.flags.DEFINE_string("master", None, "[Optional] TensorFlow master URL.")
+
+flags.DEFINE_integer(
+ "num_tpu_cores", 8,
+ "Only used if `use_tpu` is True. Total number of TPU cores to use.")
+
+
+def model_fn_builder(bert_config, init_checkpoint, learning_rate,
+ num_train_steps, num_warmup_steps, use_tpu,
+ use_one_hot_embeddings):
+ """Returns `model_fn` closure for TPUEstimator."""
+
+ def model_fn(features, labels, mode, params): # pylint: disable=unused-argument
+ """The `model_fn` for TPUEstimator."""
+
+ tf.logging.info("*** Features ***")
+ for name in sorted(features.keys()):
+ tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape))
+
+ input_ids = features["input_ids"]
+ input_mask = features["input_mask"]
+ segment_ids = features["segment_ids"]
+ masked_lm_positions = features["masked_lm_positions"]
+ masked_lm_ids = features["masked_lm_ids"]
+ masked_lm_weights = features["masked_lm_weights"]
+ next_sentence_labels = features["next_sentence_labels"]
+
+ is_training = (mode == tf.estimator.ModeKeys.TRAIN)
+
+ model = modeling.BertModel(
+ config=bert_config,
+ is_training=is_training,
+ input_ids=input_ids,
+ input_mask=input_mask,
+ token_type_ids=segment_ids,
+ use_one_hot_embeddings=use_one_hot_embeddings)
+
+ (masked_lm_loss,
+ masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output(
+ bert_config, model.get_sequence_output(), model.get_embedding_table(),
+ masked_lm_positions, masked_lm_ids, masked_lm_weights)
+
+ (next_sentence_loss, next_sentence_example_loss,
+ next_sentence_log_probs) = get_next_sentence_output(
+ bert_config, model.get_pooled_output(), next_sentence_labels)
+
+ total_loss = masked_lm_loss + next_sentence_loss
+
+ tvars = tf.trainable_variables()
+
+ initialized_variable_names = {}
+ scaffold_fn = None
+ if init_checkpoint:
+ (assignment_map, initialized_variable_names
+ ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
+ if use_tpu:
+
+ def tpu_scaffold():
+ tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
+ return tf.train.Scaffold()
+
+ scaffold_fn = tpu_scaffold
+ else:
+ tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
+
+ tf.logging.info("**** Trainable Variables ****")
+ for var in tvars:
+ init_string = ""
+ if var.name in initialized_variable_names:
+ init_string = ", *INIT_FROM_CKPT*"
+ tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape,
+ init_string)
+
+ output_spec = None
+ if mode == tf.estimator.ModeKeys.TRAIN:
+ train_op = optimization.create_optimizer(
+ total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)
+
+ output_spec = tf.contrib.tpu.TPUEstimatorSpec(
+ mode=mode,
+ loss=total_loss,
+ train_op=train_op,
+ scaffold_fn=scaffold_fn)
+ elif mode == tf.estimator.ModeKeys.EVAL:
+
+ def metric_fn(masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
+ masked_lm_weights, next_sentence_example_loss,
+ next_sentence_log_probs, next_sentence_labels):
+ """Computes the loss and accuracy of the model."""
+ masked_lm_log_probs = tf.reshape(masked_lm_log_probs,
+ [-1, masked_lm_log_probs.shape[-1]])
+ masked_lm_predictions = tf.argmax(
+ masked_lm_log_probs, axis=-1, output_type=tf.int32)
+ masked_lm_example_loss = tf.reshape(masked_lm_example_loss, [-1])
+ masked_lm_ids = tf.reshape(masked_lm_ids, [-1])
+ masked_lm_weights = tf.reshape(masked_lm_weights, [-1])
+ masked_lm_accuracy = tf.metrics.accuracy(
+ labels=masked_lm_ids,
+ predictions=masked_lm_predictions,
+ weights=masked_lm_weights)
+ masked_lm_mean_loss = tf.metrics.mean(
+ values=masked_lm_example_loss, weights=masked_lm_weights)
+
+ next_sentence_log_probs = tf.reshape(
+ next_sentence_log_probs, [-1, next_sentence_log_probs.shape[-1]])
+ next_sentence_predictions = tf.argmax(
+ next_sentence_log_probs, axis=-1, output_type=tf.int32)
+ next_sentence_labels = tf.reshape(next_sentence_labels, [-1])
+ next_sentence_accuracy = tf.metrics.accuracy(
+ labels=next_sentence_labels, predictions=next_sentence_predictions)
+ next_sentence_mean_loss = tf.metrics.mean(
+ values=next_sentence_example_loss)
+
+ return {
+ "masked_lm_accuracy": masked_lm_accuracy,
+ "masked_lm_loss": masked_lm_mean_loss,
+ "next_sentence_accuracy": next_sentence_accuracy,
+ "next_sentence_loss": next_sentence_mean_loss,
+ }
+
+ eval_metrics = (metric_fn, [
+ masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
+ masked_lm_weights, next_sentence_example_loss,
+ next_sentence_log_probs, next_sentence_labels
+ ])
+ output_spec = tf.contrib.tpu.TPUEstimatorSpec(
+ mode=mode,
+ loss=total_loss,
+ eval_metrics=eval_metrics,
+ scaffold_fn=scaffold_fn)
+ else:
+ raise ValueError("Only TRAIN and EVAL modes are supported: %s" % (mode))
+
+ return output_spec
+
+ return model_fn
+
+
+def get_masked_lm_output(bert_config, input_tensor, output_weights, positions,
+ label_ids, label_weights):
+ """Get loss and log probs for the masked LM."""
+ input_tensor = gather_indexes(input_tensor, positions)
+
+ with tf.variable_scope("cls/predictions"):
+ # We apply one more non-linear transformation before the output layer.
+ # This matrix is not used after pre-training.
+ with tf.variable_scope("transform"):
+ input_tensor = tf.layers.dense(
+ input_tensor,
+ units=bert_config.hidden_size,
+ activation=modeling.get_activation(bert_config.hidden_act),
+ kernel_initializer=modeling.create_initializer(
+ bert_config.initializer_range))
+ input_tensor = modeling.layer_norm(input_tensor)
+
+ # The output weights are the same as the input embeddings, but there is
+ # an output-only bias for each token.
+ output_bias = tf.get_variable(
+ "output_bias",
+ shape=[bert_config.vocab_size],
+ initializer=tf.zeros_initializer())
+ logits = tf.matmul(input_tensor, output_weights, transpose_b=True)
+ logits = tf.nn.bias_add(logits, output_bias)
+ log_probs = tf.nn.log_softmax(logits, axis=-1)
+
+ label_ids = tf.reshape(label_ids, [-1])
+ label_weights = tf.reshape(label_weights, [-1])
+
+ one_hot_labels = tf.one_hot(
+ label_ids, depth=bert_config.vocab_size, dtype=tf.float32)
+
+ # The `positions` tensor might be zero-padded (if the sequence is too
+ # short to have the maximum number of predictions). The `label_weights`
+ # tensor has a value of 1.0 for every real prediction and 0.0 for the
+ # padding predictions.
+ per_example_loss = -tf.reduce_sum(log_probs * one_hot_labels, axis=[-1])
+ numerator = tf.reduce_sum(label_weights * per_example_loss)
+ denominator = tf.reduce_sum(label_weights) + 1e-5
+ loss = numerator / denominator
+
+ return (loss, per_example_loss, log_probs)
+
+
+def get_next_sentence_output(bert_config, input_tensor, labels):
+ """Get loss and log probs for the next sentence prediction."""
+
+ # Simple binary classification. Note that 0 is "next sentence" and 1 is
+ # "random sentence". This weight matrix is not used after pre-training.
+ with tf.variable_scope("cls/seq_relationship"):
+ output_weights = tf.get_variable(
+ "output_weights",
+ shape=[2, bert_config.hidden_size],
+ initializer=modeling.create_initializer(bert_config.initializer_range))
+ output_bias = tf.get_variable(
+ "output_bias", shape=[2], initializer=tf.zeros_initializer())
+
+ logits = tf.matmul(input_tensor, output_weights, transpose_b=True)
+ logits = tf.nn.bias_add(logits, output_bias)
+ log_probs = tf.nn.log_softmax(logits, axis=-1)
+ labels = tf.reshape(labels, [-1])
+ one_hot_labels = tf.one_hot(labels, depth=2, dtype=tf.float32)
+ per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
+ loss = tf.reduce_mean(per_example_loss)
+ return (loss, per_example_loss, log_probs)
+
+
+def gather_indexes(sequence_tensor, positions):
+ """Gathers the vectors at the specific positions over a minibatch."""
+ sequence_shape = modeling.get_shape_list(sequence_tensor, expected_rank=3)
+ batch_size = sequence_shape[0]
+ seq_length = sequence_shape[1]
+ width = sequence_shape[2]
+
+ flat_offsets = tf.reshape(
+ tf.range(0, batch_size, dtype=tf.int32) * seq_length, [-1, 1])
+ flat_positions = tf.reshape(positions + flat_offsets, [-1])
+ flat_sequence_tensor = tf.reshape(sequence_tensor,
+ [batch_size * seq_length, width])
+ output_tensor = tf.gather(flat_sequence_tensor, flat_positions)
+ return output_tensor
+
+
+def input_fn_builder(input_files,
+ max_seq_length,
+ max_predictions_per_seq,
+ is_training,
+ num_cpu_threads=4):
+ """Creates an `input_fn` closure to be passed to TPUEstimator."""
+
+ def input_fn(params):
+ """The actual input function."""
+ batch_size = params["batch_size"]
+
+ name_to_features = {
+ "input_ids":
+ tf.FixedLenFeature([max_seq_length], tf.int64),
+ "input_mask":
+ tf.FixedLenFeature([max_seq_length], tf.int64),
+ "segment_ids":
+ tf.FixedLenFeature([max_seq_length], tf.int64),
+ "masked_lm_positions":
+ tf.FixedLenFeature([max_predictions_per_seq], tf.int64),
+ "masked_lm_ids":
+ tf.FixedLenFeature([max_predictions_per_seq], tf.int64),
+ "masked_lm_weights":
+ tf.FixedLenFeature([max_predictions_per_seq], tf.float32),
+ "next_sentence_labels":
+ tf.FixedLenFeature([1], tf.int64),
+ }
+
+ # For training, we want a lot of parallel reading and shuffling.
+ # For eval, we want no shuffling and parallel reading doesn't matter.
+ if is_training:
+ d = tf.data.Dataset.from_tensor_slices(tf.constant(input_files))
+ d = d.repeat()
+ d = d.shuffle(buffer_size=len(input_files))
+
+ # `cycle_length` is the number of parallel files that get read.
+ cycle_length = min(num_cpu_threads, len(input_files))
+
+ # `sloppy` mode means that the interleaving is not exact. This adds
+ # even more randomness to the training pipeline.
+ d = d.apply(
+ tf.contrib.data.parallel_interleave(
+ tf.data.TFRecordDataset,
+ sloppy=is_training,
+ cycle_length=cycle_length))
+ d = d.shuffle(buffer_size=100)
+ else:
+ d = tf.data.TFRecordDataset(input_files)
+ # Since we evaluate for a fixed number of steps we don't want to encounter
+ # out-of-range exceptions.
+ d = d.repeat()
+
+ # We must `drop_remainder` on training because the TPU requires fixed
+ # size dimensions. For eval, we assume we are evaluating on the CPU or GPU
+ # and we *don't* want to drop the remainder, otherwise we wont cover
+ # every sample.
+ d = d.apply(
+ tf.contrib.data.map_and_batch(
+ lambda record: _decode_record(record, name_to_features),
+ batch_size=batch_size,
+ num_parallel_batches=num_cpu_threads,
+ drop_remainder=True))
+ return d
+
+ return input_fn
+
+
+def _decode_record(record, name_to_features):
+ """Decodes a record to a TensorFlow example."""
+ example = tf.parse_single_example(record, name_to_features)
+
+ # tf.Example only supports tf.int64, but the TPU only supports tf.int32.
+ # So cast all int64 to int32.
+ for name in list(example.keys()):
+ t = example[name]
+ if t.dtype == tf.int64:
+ t = tf.to_int32(t)
+ example[name] = t
+
+ return example
+
+
+def main(_):
+ tf.logging.set_verbosity(tf.logging.INFO)
+
+ if not FLAGS.do_train and not FLAGS.do_eval:
+ raise ValueError("At least one of `do_train` or `do_eval` must be True.")
+
+ bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)
+
+ tf.gfile.MakeDirs(FLAGS.output_dir)
+
+ input_files = []
+ for input_pattern in FLAGS.input_file.split(","):
+ input_files.extend(tf.gfile.Glob(input_pattern))
+
+ tf.logging.info("*** Input Files ***")
+ for input_file in input_files:
+ tf.logging.info(" %s" % input_file)
+
+ tpu_cluster_resolver = None
+ if FLAGS.use_tpu and FLAGS.tpu_name:
+ tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
+ FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)
+
+ is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
+ run_config = tf.contrib.tpu.RunConfig(
+ cluster=tpu_cluster_resolver,
+ master=FLAGS.master,
+ model_dir=FLAGS.output_dir,
+ save_checkpoints_steps=FLAGS.save_checkpoints_steps,
+ tpu_config=tf.contrib.tpu.TPUConfig(
+ iterations_per_loop=FLAGS.iterations_per_loop,
+ num_shards=FLAGS.num_tpu_cores,
+ per_host_input_for_training=is_per_host))
+
+ model_fn = model_fn_builder(
+ bert_config=bert_config,
+ init_checkpoint=FLAGS.init_checkpoint,
+ learning_rate=FLAGS.learning_rate,
+ num_train_steps=FLAGS.num_train_steps,
+ num_warmup_steps=FLAGS.num_warmup_steps,
+ use_tpu=FLAGS.use_tpu,
+ use_one_hot_embeddings=FLAGS.use_tpu)
+
+ # If TPU is not available, this will fall back to normal Estimator on CPU
+ # or GPU.
+ estimator = tf.contrib.tpu.TPUEstimator(
+ use_tpu=FLAGS.use_tpu,
+ model_fn=model_fn,
+ config=run_config,
+ train_batch_size=FLAGS.train_batch_size,
+ eval_batch_size=FLAGS.eval_batch_size)
+
+ if FLAGS.do_train:
+ tf.logging.info("***** Running training *****")
+ tf.logging.info(" Batch size = %d", FLAGS.train_batch_size)
+ train_input_fn = input_fn_builder(
+ input_files=input_files,
+ max_seq_length=FLAGS.max_seq_length,
+ max_predictions_per_seq=FLAGS.max_predictions_per_seq,
+ is_training=True)
+ estimator.train(input_fn=train_input_fn, max_steps=FLAGS.num_train_steps)
+
+ if FLAGS.do_eval:
+ tf.logging.info("***** Running evaluation *****")
+ tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size)
+
+ eval_input_fn = input_fn_builder(
+ input_files=input_files,
+ max_seq_length=FLAGS.max_seq_length,
+ max_predictions_per_seq=FLAGS.max_predictions_per_seq,
+ is_training=False)
+
+ result = estimator.evaluate(
+ input_fn=eval_input_fn, steps=FLAGS.max_eval_steps)
+
+ output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")
+ with tf.gfile.GFile(output_eval_file, "w") as writer:
+ tf.logging.info("***** Eval results *****")
+ for key in sorted(result.keys()):
+ tf.logging.info(" %s = %s", key, str(result[key]))
+ writer.write("%s = %s\n" % (key, str(result[key])))
+
+
+if __name__ == "__main__":
+ flags.mark_flag_as_required("input_file")
+ flags.mark_flag_as_required("bert_config_file")
+ flags.mark_flag_as_required("output_dir")
+ tf.app.run()
diff --git a/run_squad.py b/run_squad.py
new file mode 100644
index 0000000..edd4c3e
--- /dev/null
+++ b/run_squad.py
@@ -0,0 +1,1283 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Run BERT on SQuAD 1.1 and SQuAD 2.0."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import json
+import math
+import os
+import random
+import modeling
+import optimization
+import tokenization
+import six
+import tensorflow as tf
+
+flags = tf.flags
+
+FLAGS = flags.FLAGS
+
+## Required parameters
+flags.DEFINE_string(
+ "bert_config_file", None,
+ "The config json file corresponding to the pre-trained BERT model. "
+ "This specifies the model architecture.")
+
+flags.DEFINE_string("vocab_file", None,
+ "The vocabulary file that the BERT model was trained on.")
+
+flags.DEFINE_string(
+ "output_dir", None,
+ "The output directory where the model checkpoints will be written.")
+
+## Other parameters
+flags.DEFINE_string("train_file", None,
+ "SQuAD json for training. E.g., train-v1.1.json")
+
+flags.DEFINE_string(
+ "predict_file", None,
+ "SQuAD json for predictions. E.g., dev-v1.1.json or test-v1.1.json")
+
+flags.DEFINE_string(
+ "init_checkpoint", None,
+ "Initial checkpoint (usually from a pre-trained BERT model).")
+
+flags.DEFINE_bool(
+ "do_lower_case", True,
+ "Whether to lower case the input text. Should be True for uncased "
+ "models and False for cased models.")
+
+flags.DEFINE_integer(
+ "max_seq_length", 384,
+ "The maximum total input sequence length after WordPiece tokenization. "
+ "Sequences longer than this will be truncated, and sequences shorter "
+ "than this will be padded.")
+
+flags.DEFINE_integer(
+ "doc_stride", 128,
+ "When splitting up a long document into chunks, how much stride to "
+ "take between chunks.")
+
+flags.DEFINE_integer(
+ "max_query_length", 64,
+ "The maximum number of tokens for the question. Questions longer than "
+ "this will be truncated to this length.")
+
+flags.DEFINE_bool("do_train", False, "Whether to run training.")
+
+flags.DEFINE_bool("do_predict", False, "Whether to run eval on the dev set.")
+
+flags.DEFINE_integer("train_batch_size", 32, "Total batch size for training.")
+
+flags.DEFINE_integer("predict_batch_size", 8,
+ "Total batch size for predictions.")
+
+flags.DEFINE_float("learning_rate", 5e-5, "The initial learning rate for Adam.")
+
+flags.DEFINE_float("num_train_epochs", 3.0,
+ "Total number of training epochs to perform.")
+
+flags.DEFINE_float(
+ "warmup_proportion", 0.1,
+ "Proportion of training to perform linear learning rate warmup for. "
+ "E.g., 0.1 = 10% of training.")
+
+flags.DEFINE_integer("save_checkpoints_steps", 1000,
+ "How often to save the model checkpoint.")
+
+flags.DEFINE_integer("iterations_per_loop", 1000,
+ "How many steps to make in each estimator call.")
+
+flags.DEFINE_integer(
+ "n_best_size", 20,
+ "The total number of n-best predictions to generate in the "
+ "nbest_predictions.json output file.")
+
+flags.DEFINE_integer(
+ "max_answer_length", 30,
+ "The maximum length of an answer that can be generated. This is needed "
+ "because the start and end predictions are not conditioned on one another.")
+
+flags.DEFINE_bool("use_tpu", False, "Whether to use TPU or GPU/CPU.")
+
+tf.flags.DEFINE_string(
+ "tpu_name", None,
+ "The Cloud TPU to use for training. This should be either the name "
+ "used when creating the Cloud TPU, or a grpc://ip.address.of.tpu:8470 "
+ "url.")
+
+tf.flags.DEFINE_string(
+ "tpu_zone", None,
+ "[Optional] GCE zone where the Cloud TPU is located in. If not "
+ "specified, we will attempt to automatically detect the GCE project from "
+ "metadata.")
+
+tf.flags.DEFINE_string(
+ "gcp_project", None,
+ "[Optional] Project name for the Cloud TPU-enabled project. If not "
+ "specified, we will attempt to automatically detect the GCE project from "
+ "metadata.")
+
+tf.flags.DEFINE_string("master", None, "[Optional] TensorFlow master URL.")
+
+flags.DEFINE_integer(
+ "num_tpu_cores", 8,
+ "Only used if `use_tpu` is True. Total number of TPU cores to use.")
+
+flags.DEFINE_bool(
+ "verbose_logging", False,
+ "If true, all of the warnings related to data processing will be printed. "
+ "A number of warnings are expected for a normal SQuAD evaluation.")
+
+flags.DEFINE_bool(
+ "version_2_with_negative", False,
+ "If true, the SQuAD examples contain some that do not have an answer.")
+
+flags.DEFINE_float(
+ "null_score_diff_threshold", 0.0,
+ "If null_score - best_non_null is greater than the threshold predict null.")
+
+
+class SquadExample(object):
+ """A single training/test example for simple sequence classification.
+
+ For examples without an answer, the start and end position are -1.
+ """
+
+ def __init__(self,
+ qas_id,
+ question_text,
+ doc_tokens,
+ orig_answer_text=None,
+ start_position=None,
+ end_position=None,
+ is_impossible=False):
+ self.qas_id = qas_id
+ self.question_text = question_text
+ self.doc_tokens = doc_tokens
+ self.orig_answer_text = orig_answer_text
+ self.start_position = start_position
+ self.end_position = end_position
+ self.is_impossible = is_impossible
+
+ def __str__(self):
+ return self.__repr__()
+
+ def __repr__(self):
+ s = ""
+ s += "qas_id: %s" % (tokenization.printable_text(self.qas_id))
+ s += ", question_text: %s" % (
+ tokenization.printable_text(self.question_text))
+ s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens))
+ if self.start_position:
+ s += ", start_position: %d" % (self.start_position)
+ if self.start_position:
+ s += ", end_position: %d" % (self.end_position)
+ if self.start_position:
+ s += ", is_impossible: %r" % (self.is_impossible)
+ return s
+
+
+class InputFeatures(object):
+ """A single set of features of data."""
+
+ def __init__(self,
+ unique_id,
+ example_index,
+ doc_span_index,
+ tokens,
+ token_to_orig_map,
+ token_is_max_context,
+ input_ids,
+ input_mask,
+ segment_ids,
+ start_position=None,
+ end_position=None,
+ is_impossible=None):
+ self.unique_id = unique_id
+ self.example_index = example_index
+ self.doc_span_index = doc_span_index
+ self.tokens = tokens
+ self.token_to_orig_map = token_to_orig_map
+ self.token_is_max_context = token_is_max_context
+ self.input_ids = input_ids
+ self.input_mask = input_mask
+ self.segment_ids = segment_ids
+ self.start_position = start_position
+ self.end_position = end_position
+ self.is_impossible = is_impossible
+
+
+def read_squad_examples(input_file, is_training):
+ """Read a SQuAD json file into a list of SquadExample."""
+ with tf.gfile.Open(input_file, "r") as reader:
+ input_data = json.load(reader)["data"]
+
+ def is_whitespace(c):
+ if c == " " or c == "\t" or c == "\r" or c == "\n" or ord(c) == 0x202F:
+ return True
+ return False
+
+ examples = []
+ for entry in input_data:
+ for paragraph in entry["paragraphs"]:
+ paragraph_text = paragraph["context"]
+ doc_tokens = []
+ char_to_word_offset = []
+ prev_is_whitespace = True
+ for c in paragraph_text:
+ if is_whitespace(c):
+ prev_is_whitespace = True
+ else:
+ if prev_is_whitespace:
+ doc_tokens.append(c)
+ else:
+ doc_tokens[-1] += c
+ prev_is_whitespace = False
+ char_to_word_offset.append(len(doc_tokens) - 1)
+
+ for qa in paragraph["qas"]:
+ qas_id = qa["id"]
+ question_text = qa["question"]
+ start_position = None
+ end_position = None
+ orig_answer_text = None
+ is_impossible = False
+ if is_training:
+
+ if FLAGS.version_2_with_negative:
+ is_impossible = qa["is_impossible"]
+ if (len(qa["answers"]) != 1) and (not is_impossible):
+ raise ValueError(
+ "For training, each question should have exactly 1 answer.")
+ if not is_impossible:
+ answer = qa["answers"][0]
+ orig_answer_text = answer["text"]
+ answer_offset = answer["answer_start"]
+ answer_length = len(orig_answer_text)
+ start_position = char_to_word_offset[answer_offset]
+ end_position = char_to_word_offset[answer_offset + answer_length -
+ 1]
+ # Only add answers where the text can be exactly recovered from the
+ # document. If this CAN'T happen it's likely due to weird Unicode
+ # stuff so we will just skip the example.
+ #
+ # Note that this means for training mode, every example is NOT
+ # guaranteed to be preserved.
+ actual_text = " ".join(
+ doc_tokens[start_position:(end_position + 1)])
+ cleaned_answer_text = " ".join(
+ tokenization.whitespace_tokenize(orig_answer_text))
+ if actual_text.find(cleaned_answer_text) == -1:
+ tf.logging.warning("Could not find answer: '%s' vs. '%s'",
+ actual_text, cleaned_answer_text)
+ continue
+ else:
+ start_position = -1
+ end_position = -1
+ orig_answer_text = ""
+
+ example = SquadExample(
+ qas_id=qas_id,
+ question_text=question_text,
+ doc_tokens=doc_tokens,
+ orig_answer_text=orig_answer_text,
+ start_position=start_position,
+ end_position=end_position,
+ is_impossible=is_impossible)
+ examples.append(example)
+
+ return examples
+
+
+def convert_examples_to_features(examples, tokenizer, max_seq_length,
+ doc_stride, max_query_length, is_training,
+ output_fn):
+ """Loads a data file into a list of `InputBatch`s."""
+
+ unique_id = 1000000000
+
+ for (example_index, example) in enumerate(examples):
+ query_tokens = tokenizer.tokenize(example.question_text)
+
+ if len(query_tokens) > max_query_length:
+ query_tokens = query_tokens[0:max_query_length]
+
+ tok_to_orig_index = []
+ orig_to_tok_index = []
+ all_doc_tokens = []
+ for (i, token) in enumerate(example.doc_tokens):
+ orig_to_tok_index.append(len(all_doc_tokens))
+ sub_tokens = tokenizer.tokenize(token)
+ for sub_token in sub_tokens:
+ tok_to_orig_index.append(i)
+ all_doc_tokens.append(sub_token)
+
+ tok_start_position = None
+ tok_end_position = None
+ if is_training and example.is_impossible:
+ tok_start_position = -1
+ tok_end_position = -1
+ if is_training and not example.is_impossible:
+ tok_start_position = orig_to_tok_index[example.start_position]
+ if example.end_position < len(example.doc_tokens) - 1:
+ tok_end_position = orig_to_tok_index[example.end_position + 1] - 1
+ else:
+ tok_end_position = len(all_doc_tokens) - 1
+ (tok_start_position, tok_end_position) = _improve_answer_span(
+ all_doc_tokens, tok_start_position, tok_end_position, tokenizer,
+ example.orig_answer_text)
+
+ # The -3 accounts for [CLS], [SEP] and [SEP]
+ max_tokens_for_doc = max_seq_length - len(query_tokens) - 3
+
+ # We can have documents that are longer than the maximum sequence length.
+ # To deal with this we do a sliding window approach, where we take chunks
+ # of the up to our max length with a stride of `doc_stride`.
+ _DocSpan = collections.namedtuple( # pylint: disable=invalid-name
+ "DocSpan", ["start", "length"])
+ doc_spans = []
+ start_offset = 0
+ while start_offset < len(all_doc_tokens):
+ length = len(all_doc_tokens) - start_offset
+ if length > max_tokens_for_doc:
+ length = max_tokens_for_doc
+ doc_spans.append(_DocSpan(start=start_offset, length=length))
+ if start_offset + length == len(all_doc_tokens):
+ break
+ start_offset += min(length, doc_stride)
+
+ for (doc_span_index, doc_span) in enumerate(doc_spans):
+ tokens = []
+ token_to_orig_map = {}
+ token_is_max_context = {}
+ segment_ids = []
+ tokens.append("[CLS]")
+ segment_ids.append(0)
+ for token in query_tokens:
+ tokens.append(token)
+ segment_ids.append(0)
+ tokens.append("[SEP]")
+ segment_ids.append(0)
+
+ for i in range(doc_span.length):
+ split_token_index = doc_span.start + i
+ token_to_orig_map[len(tokens)] = tok_to_orig_index[split_token_index]
+
+ is_max_context = _check_is_max_context(doc_spans, doc_span_index,
+ split_token_index)
+ token_is_max_context[len(tokens)] = is_max_context
+ tokens.append(all_doc_tokens[split_token_index])
+ segment_ids.append(1)
+ tokens.append("[SEP]")
+ segment_ids.append(1)
+
+ input_ids = tokenizer.convert_tokens_to_ids(tokens)
+
+ # The mask has 1 for real tokens and 0 for padding tokens. Only real
+ # tokens are attended to.
+ input_mask = [1] * len(input_ids)
+
+ # Zero-pad up to the sequence length.
+ while len(input_ids) < max_seq_length:
+ input_ids.append(0)
+ input_mask.append(0)
+ segment_ids.append(0)
+
+ assert len(input_ids) == max_seq_length
+ assert len(input_mask) == max_seq_length
+ assert len(segment_ids) == max_seq_length
+
+ start_position = None
+ end_position = None
+ if is_training and not example.is_impossible:
+ # For training, if our document chunk does not contain an annotation
+ # we throw it out, since there is nothing to predict.
+ doc_start = doc_span.start
+ doc_end = doc_span.start + doc_span.length - 1
+ out_of_span = False
+ if not (tok_start_position >= doc_start and
+ tok_end_position <= doc_end):
+ out_of_span = True
+ if out_of_span:
+ start_position = 0
+ end_position = 0
+ else:
+ doc_offset = len(query_tokens) + 2
+ start_position = tok_start_position - doc_start + doc_offset
+ end_position = tok_end_position - doc_start + doc_offset
+
+ if is_training and example.is_impossible:
+ start_position = 0
+ end_position = 0
+
+ if example_index < 20:
+ tf.logging.info("*** Example ***")
+ tf.logging.info("unique_id: %s" % (unique_id))
+ tf.logging.info("example_index: %s" % (example_index))
+ tf.logging.info("doc_span_index: %s" % (doc_span_index))
+ tf.logging.info("tokens: %s" % " ".join(
+ [tokenization.printable_text(x) for x in tokens]))
+ tf.logging.info("token_to_orig_map: %s" % " ".join(
+ ["%d:%d" % (x, y) for (x, y) in six.iteritems(token_to_orig_map)]))
+ tf.logging.info("token_is_max_context: %s" % " ".join([
+ "%d:%s" % (x, y) for (x, y) in six.iteritems(token_is_max_context)
+ ]))
+ tf.logging.info("input_ids: %s" % " ".join([str(x) for x in input_ids]))
+ tf.logging.info(
+ "input_mask: %s" % " ".join([str(x) for x in input_mask]))
+ tf.logging.info(
+ "segment_ids: %s" % " ".join([str(x) for x in segment_ids]))
+ if is_training and example.is_impossible:
+ tf.logging.info("impossible example")
+ if is_training and not example.is_impossible:
+ answer_text = " ".join(tokens[start_position:(end_position + 1)])
+ tf.logging.info("start_position: %d" % (start_position))
+ tf.logging.info("end_position: %d" % (end_position))
+ tf.logging.info(
+ "answer: %s" % (tokenization.printable_text(answer_text)))
+
+ feature = InputFeatures(
+ unique_id=unique_id,
+ example_index=example_index,
+ doc_span_index=doc_span_index,
+ tokens=tokens,
+ token_to_orig_map=token_to_orig_map,
+ token_is_max_context=token_is_max_context,
+ input_ids=input_ids,
+ input_mask=input_mask,
+ segment_ids=segment_ids,
+ start_position=start_position,
+ end_position=end_position,
+ is_impossible=example.is_impossible)
+
+ # Run callback
+ output_fn(feature)
+
+ unique_id += 1
+
+
+def _improve_answer_span(doc_tokens, input_start, input_end, tokenizer,
+ orig_answer_text):
+ """Returns tokenized answer spans that better match the annotated answer."""
+
+ # The SQuAD annotations are character based. We first project them to
+ # whitespace-tokenized words. But then after WordPiece tokenization, we can
+ # often find a "better match". For example:
+ #
+ # Question: What year was John Smith born?
+ # Context: The leader was John Smith (1895-1943).
+ # Answer: 1895
+ #
+ # The original whitespace-tokenized answer will be "(1895-1943).". However
+ # after tokenization, our tokens will be "( 1895 - 1943 ) .". So we can match
+ # the exact answer, 1895.
+ #
+ # However, this is not always possible. Consider the following:
+ #
+ # Question: What country is the top exporter of electornics?
+ # Context: The Japanese electronics industry is the lagest in the world.
+ # Answer: Japan
+ #
+ # In this case, the annotator chose "Japan" as a character sub-span of
+ # the word "Japanese". Since our WordPiece tokenizer does not split
+ # "Japanese", we just use "Japanese" as the annotation. This is fairly rare
+ # in SQuAD, but does happen.
+ tok_answer_text = " ".join(tokenizer.tokenize(orig_answer_text))
+
+ for new_start in range(input_start, input_end + 1):
+ for new_end in range(input_end, new_start - 1, -1):
+ text_span = " ".join(doc_tokens[new_start:(new_end + 1)])
+ if text_span == tok_answer_text:
+ return (new_start, new_end)
+
+ return (input_start, input_end)
+
+
+def _check_is_max_context(doc_spans, cur_span_index, position):
+ """Check if this is the 'max context' doc span for the token."""
+
+ # Because of the sliding window approach taken to scoring documents, a single
+ # token can appear in multiple documents. E.g.
+ # Doc: the man went to the store and bought a gallon of milk
+ # Span A: the man went to the
+ # Span B: to the store and bought
+ # Span C: and bought a gallon of
+ # ...
+ #
+ # Now the word 'bought' will have two scores from spans B and C. We only
+ # want to consider the score with "maximum context", which we define as
+ # the *minimum* of its left and right context (the *sum* of left and
+ # right context will always be the same, of course).
+ #
+ # In the example the maximum context for 'bought' would be span C since
+ # it has 1 left context and 3 right context, while span B has 4 left context
+ # and 0 right context.
+ best_score = None
+ best_span_index = None
+ for (span_index, doc_span) in enumerate(doc_spans):
+ end = doc_span.start + doc_span.length - 1
+ if position < doc_span.start:
+ continue
+ if position > end:
+ continue
+ num_left_context = position - doc_span.start
+ num_right_context = end - position
+ score = min(num_left_context, num_right_context) + 0.01 * doc_span.length
+ if best_score is None or score > best_score:
+ best_score = score
+ best_span_index = span_index
+
+ return cur_span_index == best_span_index
+
+
+def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
+ use_one_hot_embeddings):
+ """Creates a classification model."""
+ model = modeling.BertModel(
+ config=bert_config,
+ is_training=is_training,
+ input_ids=input_ids,
+ input_mask=input_mask,
+ token_type_ids=segment_ids,
+ use_one_hot_embeddings=use_one_hot_embeddings)
+
+ final_hidden = model.get_sequence_output()
+
+ final_hidden_shape = modeling.get_shape_list(final_hidden, expected_rank=3)
+ batch_size = final_hidden_shape[0]
+ seq_length = final_hidden_shape[1]
+ hidden_size = final_hidden_shape[2]
+
+ output_weights = tf.get_variable(
+ "cls/squad/output_weights", [2, hidden_size],
+ initializer=tf.truncated_normal_initializer(stddev=0.02))
+
+ output_bias = tf.get_variable(
+ "cls/squad/output_bias", [2], initializer=tf.zeros_initializer())
+
+ final_hidden_matrix = tf.reshape(final_hidden,
+ [batch_size * seq_length, hidden_size])
+ logits = tf.matmul(final_hidden_matrix, output_weights, transpose_b=True)
+ logits = tf.nn.bias_add(logits, output_bias)
+
+ logits = tf.reshape(logits, [batch_size, seq_length, 2])
+ logits = tf.transpose(logits, [2, 0, 1])
+
+ unstacked_logits = tf.unstack(logits, axis=0)
+
+ (start_logits, end_logits) = (unstacked_logits[0], unstacked_logits[1])
+
+ return (start_logits, end_logits)
+
+
+def model_fn_builder(bert_config, init_checkpoint, learning_rate,
+ num_train_steps, num_warmup_steps, use_tpu,
+ use_one_hot_embeddings):
+ """Returns `model_fn` closure for TPUEstimator."""
+
+ def model_fn(features, labels, mode, params): # pylint: disable=unused-argument
+ """The `model_fn` for TPUEstimator."""
+
+ tf.logging.info("*** Features ***")
+ for name in sorted(features.keys()):
+ tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape))
+
+ unique_ids = features["unique_ids"]
+ input_ids = features["input_ids"]
+ input_mask = features["input_mask"]
+ segment_ids = features["segment_ids"]
+
+ is_training = (mode == tf.estimator.ModeKeys.TRAIN)
+
+ (start_logits, end_logits) = create_model(
+ bert_config=bert_config,
+ is_training=is_training,
+ input_ids=input_ids,
+ input_mask=input_mask,
+ segment_ids=segment_ids,
+ use_one_hot_embeddings=use_one_hot_embeddings)
+
+ tvars = tf.trainable_variables()
+
+ initialized_variable_names = {}
+ scaffold_fn = None
+ if init_checkpoint:
+ (assignment_map, initialized_variable_names
+ ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
+ if use_tpu:
+
+ def tpu_scaffold():
+ tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
+ return tf.train.Scaffold()
+
+ scaffold_fn = tpu_scaffold
+ else:
+ tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
+
+ tf.logging.info("**** Trainable Variables ****")
+ for var in tvars:
+ init_string = ""
+ if var.name in initialized_variable_names:
+ init_string = ", *INIT_FROM_CKPT*"
+ tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape,
+ init_string)
+
+ output_spec = None
+ if mode == tf.estimator.ModeKeys.TRAIN:
+ seq_length = modeling.get_shape_list(input_ids)[1]
+
+ def compute_loss(logits, positions):
+ one_hot_positions = tf.one_hot(
+ positions, depth=seq_length, dtype=tf.float32)
+ log_probs = tf.nn.log_softmax(logits, axis=-1)
+ loss = -tf.reduce_mean(
+ tf.reduce_sum(one_hot_positions * log_probs, axis=-1))
+ return loss
+
+ start_positions = features["start_positions"]
+ end_positions = features["end_positions"]
+
+ start_loss = compute_loss(start_logits, start_positions)
+ end_loss = compute_loss(end_logits, end_positions)
+
+ total_loss = (start_loss + end_loss) / 2.0
+
+ train_op = optimization.create_optimizer(
+ total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)
+
+ output_spec = tf.contrib.tpu.TPUEstimatorSpec(
+ mode=mode,
+ loss=total_loss,
+ train_op=train_op,
+ scaffold_fn=scaffold_fn)
+ elif mode == tf.estimator.ModeKeys.PREDICT:
+ predictions = {
+ "unique_ids": unique_ids,
+ "start_logits": start_logits,
+ "end_logits": end_logits,
+ }
+ output_spec = tf.contrib.tpu.TPUEstimatorSpec(
+ mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)
+ else:
+ raise ValueError(
+ "Only TRAIN and PREDICT modes are supported: %s" % (mode))
+
+ return output_spec
+
+ return model_fn
+
+
+def input_fn_builder(input_file, seq_length, is_training, drop_remainder):
+ """Creates an `input_fn` closure to be passed to TPUEstimator."""
+
+ name_to_features = {
+ "unique_ids": tf.FixedLenFeature([], tf.int64),
+ "input_ids": tf.FixedLenFeature([seq_length], tf.int64),
+ "input_mask": tf.FixedLenFeature([seq_length], tf.int64),
+ "segment_ids": tf.FixedLenFeature([seq_length], tf.int64),
+ }
+
+ if is_training:
+ name_to_features["start_positions"] = tf.FixedLenFeature([], tf.int64)
+ name_to_features["end_positions"] = tf.FixedLenFeature([], tf.int64)
+
+ def _decode_record(record, name_to_features):
+ """Decodes a record to a TensorFlow example."""
+ example = tf.parse_single_example(record, name_to_features)
+
+ # tf.Example only supports tf.int64, but the TPU only supports tf.int32.
+ # So cast all int64 to int32.
+ for name in list(example.keys()):
+ t = example[name]
+ if t.dtype == tf.int64:
+ t = tf.to_int32(t)
+ example[name] = t
+
+ return example
+
+ def input_fn(params):
+ """The actual input function."""
+ batch_size = params["batch_size"]
+
+ # For training, we want a lot of parallel reading and shuffling.
+ # For eval, we want no shuffling and parallel reading doesn't matter.
+ d = tf.data.TFRecordDataset(input_file)
+ if is_training:
+ d = d.repeat()
+ d = d.shuffle(buffer_size=100)
+
+ d = d.apply(
+ tf.contrib.data.map_and_batch(
+ lambda record: _decode_record(record, name_to_features),
+ batch_size=batch_size,
+ drop_remainder=drop_remainder))
+
+ return d
+
+ return input_fn
+
+
+RawResult = collections.namedtuple("RawResult",
+ ["unique_id", "start_logits", "end_logits"])
+
+
+def write_predictions(all_examples, all_features, all_results, n_best_size,
+ max_answer_length, do_lower_case, output_prediction_file,
+ output_nbest_file, output_null_log_odds_file):
+ """Write final predictions to the json file and log-odds of null if needed."""
+ tf.logging.info("Writing predictions to: %s" % (output_prediction_file))
+ tf.logging.info("Writing nbest to: %s" % (output_nbest_file))
+
+ example_index_to_features = collections.defaultdict(list)
+ for feature in all_features:
+ example_index_to_features[feature.example_index].append(feature)
+
+ unique_id_to_result = {}
+ for result in all_results:
+ unique_id_to_result[result.unique_id] = result
+
+ _PrelimPrediction = collections.namedtuple( # pylint: disable=invalid-name
+ "PrelimPrediction",
+ ["feature_index", "start_index", "end_index", "start_logit", "end_logit"])
+
+ all_predictions = collections.OrderedDict()
+ all_nbest_json = collections.OrderedDict()
+ scores_diff_json = collections.OrderedDict()
+
+ for (example_index, example) in enumerate(all_examples):
+ features = example_index_to_features[example_index]
+
+ prelim_predictions = []
+ # keep track of the minimum score of null start+end of position 0
+ score_null = 1000000 # large and positive
+ min_null_feature_index = 0 # the paragraph slice with min mull score
+ null_start_logit = 0 # the start logit at the slice with min null score
+ null_end_logit = 0 # the end logit at the slice with min null score
+ for (feature_index, feature) in enumerate(features):
+ result = unique_id_to_result[feature.unique_id]
+ start_indexes = _get_best_indexes(result.start_logits, n_best_size)
+ end_indexes = _get_best_indexes(result.end_logits, n_best_size)
+ # if we could have irrelevant answers, get the min score of irrelevant
+ if FLAGS.version_2_with_negative:
+ feature_null_score = result.start_logits[0] + result.end_logits[0]
+ if feature_null_score < score_null:
+ score_null = feature_null_score
+ min_null_feature_index = feature_index
+ null_start_logit = result.start_logits[0]
+ null_end_logit = result.end_logits[0]
+ for start_index in start_indexes:
+ for end_index in end_indexes:
+ # We could hypothetically create invalid predictions, e.g., predict
+ # that the start of the span is in the question. We throw out all
+ # invalid predictions.
+ if start_index >= len(feature.tokens):
+ continue
+ if end_index >= len(feature.tokens):
+ continue
+ if start_index not in feature.token_to_orig_map:
+ continue
+ if end_index not in feature.token_to_orig_map:
+ continue
+ if not feature.token_is_max_context.get(start_index, False):
+ continue
+ if end_index < start_index:
+ continue
+ length = end_index - start_index + 1
+ if length > max_answer_length:
+ continue
+ prelim_predictions.append(
+ _PrelimPrediction(
+ feature_index=feature_index,
+ start_index=start_index,
+ end_index=end_index,
+ start_logit=result.start_logits[start_index],
+ end_logit=result.end_logits[end_index]))
+
+ if FLAGS.version_2_with_negative:
+ prelim_predictions.append(
+ _PrelimPrediction(
+ feature_index=min_null_feature_index,
+ start_index=0,
+ end_index=0,
+ start_logit=null_start_logit,
+ end_logit=null_end_logit))
+ prelim_predictions = sorted(
+ prelim_predictions,
+ key=lambda x: (x.start_logit + x.end_logit),
+ reverse=True)
+
+ _NbestPrediction = collections.namedtuple( # pylint: disable=invalid-name
+ "NbestPrediction", ["text", "start_logit", "end_logit"])
+
+ seen_predictions = {}
+ nbest = []
+ for pred in prelim_predictions:
+ if len(nbest) >= n_best_size:
+ break
+ feature = features[pred.feature_index]
+ if pred.start_index > 0: # this is a non-null prediction
+ tok_tokens = feature.tokens[pred.start_index:(pred.end_index + 1)]
+ orig_doc_start = feature.token_to_orig_map[pred.start_index]
+ orig_doc_end = feature.token_to_orig_map[pred.end_index]
+ orig_tokens = example.doc_tokens[orig_doc_start:(orig_doc_end + 1)]
+ tok_text = " ".join(tok_tokens)
+
+ # De-tokenize WordPieces that have been split off.
+ tok_text = tok_text.replace(" ##", "")
+ tok_text = tok_text.replace("##", "")
+
+ # Clean whitespace
+ tok_text = tok_text.strip()
+ tok_text = " ".join(tok_text.split())
+ orig_text = " ".join(orig_tokens)
+
+ final_text = get_final_text(tok_text, orig_text, do_lower_case)
+ if final_text in seen_predictions:
+ continue
+
+ seen_predictions[final_text] = True
+ else:
+ final_text = ""
+ seen_predictions[final_text] = True
+
+ nbest.append(
+ _NbestPrediction(
+ text=final_text,
+ start_logit=pred.start_logit,
+ end_logit=pred.end_logit))
+
+ # if we didn't inlude the empty option in the n-best, inlcude it
+ if FLAGS.version_2_with_negative:
+ if "" not in seen_predictions:
+ nbest.append(
+ _NbestPrediction(
+ text="", start_logit=null_start_logit,
+ end_logit=null_end_logit))
+ # In very rare edge cases we could have no valid predictions. So we
+ # just create a nonce prediction in this case to avoid failure.
+ if not nbest:
+ nbest.append(
+ _NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0))
+
+ assert len(nbest) >= 1
+
+ total_scores = []
+ best_non_null_entry = None
+ for entry in nbest:
+ total_scores.append(entry.start_logit + entry.end_logit)
+ if not best_non_null_entry:
+ if entry.text:
+ best_non_null_entry = entry
+
+ probs = _compute_softmax(total_scores)
+
+ nbest_json = []
+ for (i, entry) in enumerate(nbest):
+ output = collections.OrderedDict()
+ output["text"] = entry.text
+ output["probability"] = probs[i]
+ output["start_logit"] = entry.start_logit
+ output["end_logit"] = entry.end_logit
+ nbest_json.append(output)
+
+ assert len(nbest_json) >= 1
+
+ if not FLAGS.version_2_with_negative:
+ all_predictions[example.qas_id] = nbest_json[0]["text"]
+ else:
+ # predict "" iff the null score - the score of best non-null > threshold
+ score_diff = score_null - best_non_null_entry.start_logit - (
+ best_non_null_entry.end_logit)
+ scores_diff_json[example.qas_id] = score_diff
+ if score_diff > FLAGS.null_score_diff_threshold:
+ all_predictions[example.qas_id] = ""
+ else:
+ all_predictions[example.qas_id] = best_non_null_entry.text
+
+ all_nbest_json[example.qas_id] = nbest_json
+
+ with tf.gfile.GFile(output_prediction_file, "w") as writer:
+ writer.write(json.dumps(all_predictions, indent=4) + "\n")
+
+ with tf.gfile.GFile(output_nbest_file, "w") as writer:
+ writer.write(json.dumps(all_nbest_json, indent=4) + "\n")
+
+ if FLAGS.version_2_with_negative:
+ with tf.gfile.GFile(output_null_log_odds_file, "w") as writer:
+ writer.write(json.dumps(scores_diff_json, indent=4) + "\n")
+
+
+def get_final_text(pred_text, orig_text, do_lower_case):
+ """Project the tokenized prediction back to the original text."""
+
+ # When we created the data, we kept track of the alignment between original
+ # (whitespace tokenized) tokens and our WordPiece tokenized tokens. So
+ # now `orig_text` contains the span of our original text corresponding to the
+ # span that we predicted.
+ #
+ # However, `orig_text` may contain extra characters that we don't want in
+ # our prediction.
+ #
+ # For example, let's say:
+ # pred_text = steve smith
+ # orig_text = Steve Smith's
+ #
+ # We don't want to return `orig_text` because it contains the extra "'s".
+ #
+ # We don't want to return `pred_text` because it's already been normalized
+ # (the SQuAD eval script also does punctuation stripping/lower casing but
+ # our tokenizer does additional normalization like stripping accent
+ # characters).
+ #
+ # What we really want to return is "Steve Smith".
+ #
+ # Therefore, we have to apply a semi-complicated alignment heruistic between
+ # `pred_text` and `orig_text` to get a character-to-charcter alignment. This
+ # can fail in certain cases in which case we just return `orig_text`.
+
+ def _strip_spaces(text):
+ ns_chars = []
+ ns_to_s_map = collections.OrderedDict()
+ for (i, c) in enumerate(text):
+ if c == " ":
+ continue
+ ns_to_s_map[len(ns_chars)] = i
+ ns_chars.append(c)
+ ns_text = "".join(ns_chars)
+ return (ns_text, ns_to_s_map)
+
+ # We first tokenize `orig_text`, strip whitespace from the result
+ # and `pred_text`, and check if they are the same length. If they are
+ # NOT the same length, the heuristic has failed. If they are the same
+ # length, we assume the characters are one-to-one aligned.
+ tokenizer = tokenization.BasicTokenizer(do_lower_case=do_lower_case)
+
+ tok_text = " ".join(tokenizer.tokenize(orig_text))
+
+ start_position = tok_text.find(pred_text)
+ if start_position == -1:
+ if FLAGS.verbose_logging:
+ tf.logging.info(
+ "Unable to find text: '%s' in '%s'" % (pred_text, orig_text))
+ return orig_text
+ end_position = start_position + len(pred_text) - 1
+
+ (orig_ns_text, orig_ns_to_s_map) = _strip_spaces(orig_text)
+ (tok_ns_text, tok_ns_to_s_map) = _strip_spaces(tok_text)
+
+ if len(orig_ns_text) != len(tok_ns_text):
+ if FLAGS.verbose_logging:
+ tf.logging.info("Length not equal after stripping spaces: '%s' vs '%s'",
+ orig_ns_text, tok_ns_text)
+ return orig_text
+
+ # We then project the characters in `pred_text` back to `orig_text` using
+ # the character-to-character alignment.
+ tok_s_to_ns_map = {}
+ for (i, tok_index) in six.iteritems(tok_ns_to_s_map):
+ tok_s_to_ns_map[tok_index] = i
+
+ orig_start_position = None
+ if start_position in tok_s_to_ns_map:
+ ns_start_position = tok_s_to_ns_map[start_position]
+ if ns_start_position in orig_ns_to_s_map:
+ orig_start_position = orig_ns_to_s_map[ns_start_position]
+
+ if orig_start_position is None:
+ if FLAGS.verbose_logging:
+ tf.logging.info("Couldn't map start position")
+ return orig_text
+
+ orig_end_position = None
+ if end_position in tok_s_to_ns_map:
+ ns_end_position = tok_s_to_ns_map[end_position]
+ if ns_end_position in orig_ns_to_s_map:
+ orig_end_position = orig_ns_to_s_map[ns_end_position]
+
+ if orig_end_position is None:
+ if FLAGS.verbose_logging:
+ tf.logging.info("Couldn't map end position")
+ return orig_text
+
+ output_text = orig_text[orig_start_position:(orig_end_position + 1)]
+ return output_text
+
+
+def _get_best_indexes(logits, n_best_size):
+ """Get the n-best logits from a list."""
+ index_and_score = sorted(enumerate(logits), key=lambda x: x[1], reverse=True)
+
+ best_indexes = []
+ for i in range(len(index_and_score)):
+ if i >= n_best_size:
+ break
+ best_indexes.append(index_and_score[i][0])
+ return best_indexes
+
+
+def _compute_softmax(scores):
+ """Compute softmax probability over raw logits."""
+ if not scores:
+ return []
+
+ max_score = None
+ for score in scores:
+ if max_score is None or score > max_score:
+ max_score = score
+
+ exp_scores = []
+ total_sum = 0.0
+ for score in scores:
+ x = math.exp(score - max_score)
+ exp_scores.append(x)
+ total_sum += x
+
+ probs = []
+ for score in exp_scores:
+ probs.append(score / total_sum)
+ return probs
+
+
+class FeatureWriter(object):
+ """Writes InputFeature to TF example file."""
+
+ def __init__(self, filename, is_training):
+ self.filename = filename
+ self.is_training = is_training
+ self.num_features = 0
+ self._writer = tf.python_io.TFRecordWriter(filename)
+
+ def process_feature(self, feature):
+ """Write a InputFeature to the TFRecordWriter as a tf.train.Example."""
+ self.num_features += 1
+
+ def create_int_feature(values):
+ feature = tf.train.Feature(
+ int64_list=tf.train.Int64List(value=list(values)))
+ return feature
+
+ features = collections.OrderedDict()
+ features["unique_ids"] = create_int_feature([feature.unique_id])
+ features["input_ids"] = create_int_feature(feature.input_ids)
+ features["input_mask"] = create_int_feature(feature.input_mask)
+ features["segment_ids"] = create_int_feature(feature.segment_ids)
+
+ if self.is_training:
+ features["start_positions"] = create_int_feature([feature.start_position])
+ features["end_positions"] = create_int_feature([feature.end_position])
+ impossible = 0
+ if feature.is_impossible:
+ impossible = 1
+ features["is_impossible"] = create_int_feature([impossible])
+
+ tf_example = tf.train.Example(features=tf.train.Features(feature=features))
+ self._writer.write(tf_example.SerializeToString())
+
+ def close(self):
+ self._writer.close()
+
+
+def validate_flags_or_throw(bert_config):
+ """Validate the input FLAGS or throw an exception."""
+ tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case,
+ FLAGS.init_checkpoint)
+
+ if not FLAGS.do_train and not FLAGS.do_predict:
+ raise ValueError("At least one of `do_train` or `do_predict` must be True.")
+
+ if FLAGS.do_train:
+ if not FLAGS.train_file:
+ raise ValueError(
+ "If `do_train` is True, then `train_file` must be specified.")
+ if FLAGS.do_predict:
+ if not FLAGS.predict_file:
+ raise ValueError(
+ "If `do_predict` is True, then `predict_file` must be specified.")
+
+ if FLAGS.max_seq_length > bert_config.max_position_embeddings:
+ raise ValueError(
+ "Cannot use sequence length %d because the BERT model "
+ "was only trained up to sequence length %d" %
+ (FLAGS.max_seq_length, bert_config.max_position_embeddings))
+
+ if FLAGS.max_seq_length <= FLAGS.max_query_length + 3:
+ raise ValueError(
+ "The max_seq_length (%d) must be greater than max_query_length "
+ "(%d) + 3" % (FLAGS.max_seq_length, FLAGS.max_query_length))
+
+
+def main(_):
+ tf.logging.set_verbosity(tf.logging.INFO)
+
+ bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)
+
+ validate_flags_or_throw(bert_config)
+
+ tf.gfile.MakeDirs(FLAGS.output_dir)
+
+ tokenizer = tokenization.FullTokenizer(
+ vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case)
+
+ tpu_cluster_resolver = None
+ if FLAGS.use_tpu and FLAGS.tpu_name:
+ tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
+ FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)
+
+ is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
+ run_config = tf.contrib.tpu.RunConfig(
+ cluster=tpu_cluster_resolver,
+ master=FLAGS.master,
+ model_dir=FLAGS.output_dir,
+ save_checkpoints_steps=FLAGS.save_checkpoints_steps,
+ tpu_config=tf.contrib.tpu.TPUConfig(
+ iterations_per_loop=FLAGS.iterations_per_loop,
+ num_shards=FLAGS.num_tpu_cores,
+ per_host_input_for_training=is_per_host))
+
+ train_examples = None
+ num_train_steps = None
+ num_warmup_steps = None
+ if FLAGS.do_train:
+ train_examples = read_squad_examples(
+ input_file=FLAGS.train_file, is_training=True)
+ num_train_steps = int(
+ len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs)
+ num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)
+
+ # Pre-shuffle the input to avoid having to make a very large shuffle
+ # buffer in in the `input_fn`.
+ rng = random.Random(12345)
+ rng.shuffle(train_examples)
+
+ model_fn = model_fn_builder(
+ bert_config=bert_config,
+ init_checkpoint=FLAGS.init_checkpoint,
+ learning_rate=FLAGS.learning_rate,
+ num_train_steps=num_train_steps,
+ num_warmup_steps=num_warmup_steps,
+ use_tpu=FLAGS.use_tpu,
+ use_one_hot_embeddings=FLAGS.use_tpu)
+
+ # If TPU is not available, this will fall back to normal Estimator on CPU
+ # or GPU.
+ estimator = tf.contrib.tpu.TPUEstimator(
+ use_tpu=FLAGS.use_tpu,
+ model_fn=model_fn,
+ config=run_config,
+ train_batch_size=FLAGS.train_batch_size,
+ predict_batch_size=FLAGS.predict_batch_size)
+
+ if FLAGS.do_train:
+ # We write to a temporary file to avoid storing very large constant tensors
+ # in memory.
+ train_writer = FeatureWriter(
+ filename=os.path.join(FLAGS.output_dir, "train.tf_record"),
+ is_training=True)
+ convert_examples_to_features(
+ examples=train_examples,
+ tokenizer=tokenizer,
+ max_seq_length=FLAGS.max_seq_length,
+ doc_stride=FLAGS.doc_stride,
+ max_query_length=FLAGS.max_query_length,
+ is_training=True,
+ output_fn=train_writer.process_feature)
+ train_writer.close()
+
+ tf.logging.info("***** Running training *****")
+ tf.logging.info(" Num orig examples = %d", len(train_examples))
+ tf.logging.info(" Num split examples = %d", train_writer.num_features)
+ tf.logging.info(" Batch size = %d", FLAGS.train_batch_size)
+ tf.logging.info(" Num steps = %d", num_train_steps)
+ del train_examples
+
+ train_input_fn = input_fn_builder(
+ input_file=train_writer.filename,
+ seq_length=FLAGS.max_seq_length,
+ is_training=True,
+ drop_remainder=True)
+ estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
+
+ if FLAGS.do_predict:
+ eval_examples = read_squad_examples(
+ input_file=FLAGS.predict_file, is_training=False)
+
+ eval_writer = FeatureWriter(
+ filename=os.path.join(FLAGS.output_dir, "eval.tf_record"),
+ is_training=False)
+ eval_features = []
+
+ def append_feature(feature):
+ eval_features.append(feature)
+ eval_writer.process_feature(feature)
+
+ convert_examples_to_features(
+ examples=eval_examples,
+ tokenizer=tokenizer,
+ max_seq_length=FLAGS.max_seq_length,
+ doc_stride=FLAGS.doc_stride,
+ max_query_length=FLAGS.max_query_length,
+ is_training=False,
+ output_fn=append_feature)
+ eval_writer.close()
+
+ tf.logging.info("***** Running predictions *****")
+ tf.logging.info(" Num orig examples = %d", len(eval_examples))
+ tf.logging.info(" Num split examples = %d", len(eval_features))
+ tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size)
+
+ all_results = []
+
+ predict_input_fn = input_fn_builder(
+ input_file=eval_writer.filename,
+ seq_length=FLAGS.max_seq_length,
+ is_training=False,
+ drop_remainder=False)
+
+ # If running eval on the TPU, you will need to specify the number of
+ # steps.
+ all_results = []
+ for result in estimator.predict(
+ predict_input_fn, yield_single_examples=True):
+ if len(all_results) % 1000 == 0:
+ tf.logging.info("Processing example: %d" % (len(all_results)))
+ unique_id = int(result["unique_ids"])
+ start_logits = [float(x) for x in result["start_logits"].flat]
+ end_logits = [float(x) for x in result["end_logits"].flat]
+ all_results.append(
+ RawResult(
+ unique_id=unique_id,
+ start_logits=start_logits,
+ end_logits=end_logits))
+
+ output_prediction_file = os.path.join(FLAGS.output_dir, "predictions.json")
+ output_nbest_file = os.path.join(FLAGS.output_dir, "nbest_predictions.json")
+ output_null_log_odds_file = os.path.join(FLAGS.output_dir, "null_odds.json")
+
+ write_predictions(eval_examples, eval_features, all_results,
+ FLAGS.n_best_size, FLAGS.max_answer_length,
+ FLAGS.do_lower_case, output_prediction_file,
+ output_nbest_file, output_null_log_odds_file)
+
+
+if __name__ == "__main__":
+ flags.mark_flag_as_required("vocab_file")
+ flags.mark_flag_as_required("bert_config_file")
+ flags.mark_flag_as_required("output_dir")
+ tf.app.run()
diff --git a/server.py b/server.py
new file mode 100644
index 0000000..dd276f5
--- /dev/null
+++ b/server.py
@@ -0,0 +1,417 @@
+import os
+import shutil
+
+import requests
+import datetime
+import time
+import hashlib
+import sqlite3
+import pandas
+import threading
+import logging as log
+
+server_url = "http://39.100.94.111:8083"
+openid = "gpu-server-test1"
+password = "1e327b070ab43fd071768a4d474f016adbbf3ea475577fe66a505d9e33b24f2f"
+token = None
+# 客户端代码
+client_code = "dc9fbb4f4f0b84fa903058991af60e73556494af8a02ef69fb6a93217729f04b"
+# 护照认证码
+idcode = None
+# 时间戳
+timestamp = ""
+# 单次最大处理句数
+max_stn_num = 20000
+# 当前处理的bpt的序号
+bpt_id = 0
+# STNS
+stn_list = []
+# 输入数据存储表
+predict_table = "predict_data"
+# 模型处理结果输出文件夹
+result_out_dir = "./tmp/eppredict"
+# 初始化标志位
+base_init = False
+
+log.basicConfig(filename=None, format="%(asctime)s %(levelname)s [%(funcName)s] : %(message)s", level=log.INFO)
+
+
+def get_timestamp():
+ return str(int(time.mktime(datetime.datetime.now().timetuple())) * 1000)
+
+
+base_headers = {"timestamp": get_timestamp(), "X-Requested-With": ""}
+token_headers = {"timestamp": get_timestamp(), "X-Requested-With": "", "signed": "", "openid": openid}
+
+
+# url对象
+def url_parser(url):
+ return server_url + "/" + url
+
+
+# 计算随机特征值
+def calculate_random_code():
+ return hashlib.sha1("RandomCode [{0}][{1}][{2}]".format(openid, get_timestamp(), client_code).encode("utf-8")) \
+ .hexdigest()
+
+
+# 计算客户端签名
+def calculate_signed():
+ return hashlib.sha1("SIGN [{0}][{1}][{2}]".format(openid, calculate_random_code(), token).encode("utf-8")) \
+ .hexdigest()
+
+
+# 检查用户是否存在
+def user_checker():
+ log.info("Check User Existence: openid" + str(openid))
+ checker_param = {"openid": openid}
+ base_headers["timestamp"] = get_timestamp()
+ res = requests.get(url=url_parser("user"), headers=base_headers, params=checker_param)
+ if res.status_code == 404:
+ log.warning("User Not Exist: openid" + str(openid))
+ return False
+ else:
+ log.info("User Exist: openid " + str(openid))
+ return True
+
+
+# 注册用户
+def user_register():
+ if not user_checker():
+ log.info("Try Creating New User: openid " + str(openid))
+ register_json = {"openid": openid, "password": password}
+ register_param = {"clientCode": client_code}
+ base_headers["timestamp"] = get_timestamp()
+ res = requests.post(url=url_parser("user/cs"), headers=base_headers, json=register_json, params=register_param)
+ respond_json = res.json()
+ if res.status_code == 201 and respond_json["openid"] == openid:
+ log.info("User Creation Success: openid " + str(openid))
+ return False
+ else:
+ log.error("User Creation Failed: openid " + str(openid))
+ return True
+
+
+# 获得token
+def get_token():
+ if user_checker():
+ log.info("Try Getting New Token")
+ login_json = {"openid": openid, "password": password, "clientCode": client_code}
+ res = requests.post(url=url_parser("user/login"), headers=base_headers, json=login_json)
+ respond_json = res.json()
+ if res.status_code == 200 and respond_json["info"] == "Authentication Success":
+ global token
+ token = respond_json["data"]["token"]
+ log.info("Succeed In Getting New Token" + str(token))
+ else:
+ if base_init is True:
+ user_register()
+ log.error("Fail To Get New Token")
+
+
+# 获得子服务器护照
+def get_csp():
+ global idcode
+ if token is not None:
+ log.info("Try Getting New CSP")
+ # 计算客户端签名
+ token_headers["signed"] = calculate_signed()
+ token_headers["timestamp"] = get_timestamp()
+ res = requests.post(url=url_parser("cs"), headers=token_headers)
+ respond_json = res.json()
+ log.debug(respond_json)
+ # 正常返回
+ if res.status_code == 200:
+ # 无权限检查
+ try:
+ idcode = respond_json["identityCode"]
+ log.info("Succeed In Getting CSP: idcode " + str(idcode))
+ except KeyError:
+ if respond_json["status"] == 401:
+ log.warning("Token OUT OF DATE: token " + str(token))
+ get_token()
+ return
+
+ # 无权限返回
+ elif res.status_code == 401:
+ # 重新获取token
+ log.warning("Token Maybe OUT OF DATE: token " + str(token))
+ log.info("Try to Get New Token")
+ get_token()
+ else:
+ log.error("Failed to get New CSP")
+ else:
+ get_token()
+
+
+# 更新签证
+def update_csp():
+ if idcode is not None:
+ token_headers["signed"] = calculate_signed()
+ token_headers["timestamp"] = get_timestamp()
+ res = requests.put(url=url_parser("cs"), headers=token_headers, params={"idcode": idcode})
+ respond_json = res.json()
+ log.debug(respond_json)
+ # 成功返回
+ if res.status_code == 200 and respond_json["expired"] is False:
+ log.info("Succeed IN Updating CSP: idcode " + str(idcode))
+ log.info("CSP Last Update Time: " + str(respond_json["lastUpdateTime"]))
+ elif res.status_code == 401:
+ # 尝试获得新的token
+ log.warning("Unauthorized Status Code: Try to Get New Token")
+ get_token()
+ else:
+ # 重新获得护照
+ log.warning("CSP Maybe OUT OF DATE: idcode " + str(idcode))
+ get_csp()
+
+
+# 放弃批处理任务
+def giving_up_bpt():
+ global bpt_id
+ global stn_list
+ try_count = 3
+ while try_count < 3:
+ try_count += 1
+ # 标记任务执行失败
+ res = requests.put(url=url_parser("cs/bpt"),
+ headers=token_headers,
+ params={"idcode": idcode, "bptId": bpt_id, "status": False},
+ json=[])
+
+ if res.status_code == 201:
+ log.info("Marking Task Failed Successful: bertId ", bpt_id)
+ return True
+ elif res.status_code == 401:
+ # 尝试获得新的token
+ log.warning("Unauthorized Status Code: Try to Get New Token")
+ get_token()
+ else:
+ if try_count >= 3:
+ log.error("Marking Task Failed Eventually Failed: bertId ", bpt_id)
+ log.warning("Connection Maybe Unstable")
+ return False
+ log.warning("Failed and Try: count " + str(try_count))
+
+ # 清空计算数据
+ bpt_id = None
+ stn_list = []
+
+
+# 从主服务器获得批处理任务
+def get_bpt_from_server():
+ global max_stn_num
+ global idcode
+ if idcode is not None:
+ log.info("Try Getting BPT From Server...")
+ token_headers["signed"] = calculate_signed()
+ token_headers["timestamp"] = get_timestamp()
+ res = requests.get(url=url_parser("cs/bpt"),
+ headers=token_headers,
+ params={"idcode": idcode, "maxStnNum": int(max_stn_num)})
+ respond_json = res.json()
+ print(res.json())
+ if res.status_code == 200:
+ global bpt_id
+ try:
+ bpt_id = respond_json["id"]
+ except KeyError:
+ if respond_json["status"] == 401:
+ get_token()
+ return
+
+ # 如果没有批处理任务
+ if bpt_id is None:
+ log.info("No BPT Task For Now")
+ return
+
+ stns = respond_json["stns"]
+ if len(stns) == 0:
+
+ log.info("STNS IS EMPTY, Giving UP")
+ giving_up_bpt()
+ return
+
+ log.info("Get BPT Task: bptId " + str(bpt_id))
+ global stn_list
+ stn_list = stns
+ conn = sqlite3.connect(r".\bptdata.db")
+ # 处理数据
+ cursor = conn.cursor()
+ cursor.execute("DELETE FROM {0}".format(predict_table))
+
+ log.info("Processing Bert Predict Data...")
+ for stn in stns:
+ sql = "INSERT INTO {0} (id, text) values (?, ?)".format(predict_table)
+ cursor.execute(sql, [stn["stnId"], stn["text"]])
+ conn.commit()
+ conn.close()
+ log.info("Finished in Processing Bert Predict Data")
+
+ result = execute_bert_predict()
+
+ if result is True:
+ if processing_bert_result() is True:
+ log.info("BPT Execution Success: bptId " + str(bpt_id))
+ else:
+ log.info("BPT Execution Eventually Failed: bptId " + str(bpt_id))
+ else:
+ log.error("Bert Model Execution Failed")
+
+ log.info("Try Giving Up BPT Task: bptId " + str(bpt_id))
+ giving_up_bpt()
+
+ log.info("Get Status Code: " + str(res.status_code))
+
+ # 清空计算数据
+ bpt_id = None
+ stn_list = []
+
+ elif res.status_code == 400:
+ if respond_json["data"]["exception"] == "org.codedream.epaper.exception.badrequest.AuthExpiredException":
+ print("Auth Expired Exception: Try to Get New CSP")
+ get_csp()
+ return
+ else:
+ print("Unknown Exception")
+
+ elif res.status_code == 401:
+ # 尝试获得新的token
+ log.warning("Unauthorized Status Code: Try to Get New Token")
+ get_token()
+ elif res.status_code == 500:
+ log.warning("Remote Server Error: Inner Server Error")
+ print(res.json())
+ else:
+ # 尝试获得护照
+ get_csp()
+
+
+# 初始化数据库环境
+def sqlite_create_table():
+ conn = sqlite3.connect(r".\bptdata.db")
+ cursor = conn.cursor()
+ create_tb_cmd = "CREATE TABLE IF NOT EXISTS {0}" \
+ "(id INT PRIMARY KEY," \
+ "text INT)".format(predict_table)
+ cursor.execute(create_tb_cmd)
+ cursor.execute("DELETE FROM {0}".format(predict_table))
+ conn.commit()
+ conn.close()
+
+
+# 启动BERT神经网络模型
+def execute_bert_predict():
+ if os.path.exists(result_out_dir):
+ shutil.rmtree(result_out_dir)
+ log.info("BERT Model Executing...")
+ os.system("python run_classifier.py "
+ "--task_name=eppdt "
+ "--do_predict=true "
+ "--data_dir=./tmp "
+ "--vocab_file=./chinese_wwm_ext_L-12_H-768_A-12/vocab.txt "
+ "--bert_config_file=./chinese_wwm_ext_L-12_H-768_A-12/bert_config.json "
+ "--init_checkpoint=./tmp/epout/model.ckpt-14062 "
+ "--max_seq_length=64 "
+ "--output_dir=./tmp/eppredict/ > bert_out.log 2>&1")
+ result_list = os.listdir(result_out_dir)
+ log.info("BERT Model Execution Finished.")
+ if "test_results.tsv" not in result_list:
+ return False
+ else:
+ return True
+
+
+# 处理模型计算结果
+def processing_bert_result():
+ result = pandas.read_csv(result_out_dir + '/test_results.tsv', sep='\t', header=None)
+ token_headers["timestamp"] = get_timestamp()
+ token_headers["signed"] = calculate_signed()
+ bpt_result_json = []
+ idx = 0
+
+ for i, row in result.iterrows():
+ bpt_result_json.append({"stnid": stn_list[idx]["stnId"], "tagPossible": [row[0], row[1], row[2]]})
+ idx += 1
+
+ log.debug("Bert Result Json")
+ log.debug(bpt_result_json)
+ log.info("Processing BERT Model Result Successful")
+
+ # 尝试3次
+ try_count = 0
+ while try_count < 3:
+ try_count += 1
+ log.info("Uploading BERT Model Result...")
+ res = requests.put(url=url_parser("cs/bpt"),
+ headers=token_headers,
+ params={"idcode": idcode, "bptId": bpt_id, "status": True},
+ json=bpt_result_json)
+ if res.status_code == 201:
+ log.info("Uploading Successful: bertId " + str(bpt_id))
+ return True
+ elif res.status_code == 401:
+ # 尝试获得新的token
+ log.warning("Unauthorized Status Code: Try to Get New Token")
+ get_token()
+ else:
+ if try_count >= 3:
+ log.error("Uploading Eventually Failed: bertId " + str(bpt_id))
+ log.warning("Connection Maybe Unstable")
+ return False
+ log.warning("Failed and Try: count " + str(try_count))
+
+
+# 签证更新多线程定时器
+def update_csp_timer():
+ log.info("UPDATE CSP TIMER STARTED")
+ try:
+ update_csp()
+ except:
+ log.error("Exception Thrown, Restarting Timer...")
+ finally:
+ t = threading.Timer(60, update_csp_timer)
+ t.start()
+
+
+# 批处理任务多线程定时器
+def get_bpt_timer():
+ log.info("GET BPT TIMER STARTED")
+ try:
+ get_bpt_from_server()
+ except:
+ log.error("Exception Thrown, Restarting Timer...")
+ finally:
+ t = threading.Timer(15, get_bpt_timer)
+ t.start()
+
+
+# 初始化工作
+def init():
+ global base_init
+ sqlite_create_table()
+ user_register()
+ get_token()
+ get_csp()
+ base_init = True
+
+
+# 初始化定时器
+def init_timer():
+ update_csp_timer()
+ get_bpt_timer()
+
+
+if __name__ == "__main__":
+ try_time = 0
+ while try_time < 3:
+ try:
+ init()
+ try_time = 3
+ except:
+ try_time += 1
+ time.sleep(5)
+
+ init_timer()
+ while True:
+ time.sleep(5)
diff --git a/tmp/epout/checkpoint b/tmp/epout/checkpoint
new file mode 100644
index 0000000..afa776b
--- /dev/null
+++ b/tmp/epout/checkpoint
@@ -0,0 +1,6 @@
+model_checkpoint_path: "model.ckpt-14062"
+all_model_checkpoint_paths: "model.ckpt-11000"
+all_model_checkpoint_paths: "model.ckpt-12000"
+all_model_checkpoint_paths: "model.ckpt-13000"
+all_model_checkpoint_paths: "model.ckpt-14000"
+all_model_checkpoint_paths: "model.ckpt-14062"
diff --git a/tmp/epout/eval.tf_record b/tmp/epout/eval.tf_record
new file mode 100644
index 0000000..32d1791
Binary files /dev/null and b/tmp/epout/eval.tf_record differ
diff --git a/tmp/epout/eval/events.out.tfevents.1586543049.iZ8vbescrakld4m4drzcktZ b/tmp/epout/eval/events.out.tfevents.1586543049.iZ8vbescrakld4m4drzcktZ
new file mode 100644
index 0000000..aed70db
Binary files /dev/null and b/tmp/epout/eval/events.out.tfevents.1586543049.iZ8vbescrakld4m4drzcktZ differ
diff --git a/tmp/epout/eval_results.txt b/tmp/epout/eval_results.txt
new file mode 100644
index 0000000..87aef38
--- /dev/null
+++ b/tmp/epout/eval_results.txt
@@ -0,0 +1,4 @@
+eval_accuracy = 0.98253334
+eval_loss = 0.06590833
+global_step = 14062
+loss = 0.06590833
diff --git a/tmp/epout/events.out.tfevents.1586536204.iZ8vbescrakld4m4drzcktZ b/tmp/epout/events.out.tfevents.1586536204.iZ8vbescrakld4m4drzcktZ
new file mode 100644
index 0000000..9f35bd4
Binary files /dev/null and b/tmp/epout/events.out.tfevents.1586536204.iZ8vbescrakld4m4drzcktZ differ
diff --git a/tmp/epout/graph.pbtxt b/tmp/epout/graph.pbtxt
new file mode 100644
index 0000000..8d0c735
--- /dev/null
+++ b/tmp/epout/graph.pbtxt
@@ -0,0 +1,592992 @@
+node {
+ name: "global_step/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@global_step"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT64
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT64
+ tensor_shape {
+ }
+ int64_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "global_step"
+ op: "VarHandleOp"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@global_step"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT64
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: "global_step"
+ }
+ }
+}
+node {
+ name: "global_step/IsInitialized/VarIsInitializedOp"
+ op: "VarIsInitializedOp"
+ input: "global_step"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_step/Assign"
+ op: "AssignVariableOp"
+ input: "global_step"
+ input: "global_step/Initializer/zeros"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@global_step"
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT64
+ }
+ }
+}
+node {
+ name: "global_step/Read/ReadVariableOp"
+ op: "ReadVariableOp"
+ input: "global_step"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@global_step"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT64
+ }
+ }
+}
+node {
+ name: "global_step/VarIsInitializedOp"
+ op: "VarIsInitializedOp"
+ input: "global_step"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_step/cond/Switch"
+ op: "Switch"
+ input: "global_step/VarIsInitializedOp"
+ input: "global_step/VarIsInitializedOp"
+ attr {
+ key: "T"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_step/cond/switch_t"
+ op: "Identity"
+ input: "global_step/cond/Switch:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_step/cond/switch_f"
+ op: "Identity"
+ input: "global_step/cond/Switch"
+ attr {
+ key: "T"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_step/cond/pred_id"
+ op: "Identity"
+ input: "global_step/VarIsInitializedOp"
+ attr {
+ key: "T"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_step/cond/Read/ReadVariableOp"
+ op: "ReadVariableOp"
+ input: "global_step/cond/Read/ReadVariableOp/Switch:1"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT64
+ }
+ }
+}
+node {
+ name: "global_step/cond/Read/ReadVariableOp/Switch"
+ op: "Switch"
+ input: "global_step"
+ input: "global_step/cond/pred_id"
+ attr {
+ key: "T"
+ value {
+ type: DT_RESOURCE
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@global_step"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_step/cond/Identity"
+ op: "Identity"
+ input: "global_step/cond/Read/ReadVariableOp"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT64
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_step/cond/Switch_1"
+ op: "Switch"
+ input: "global_step/Initializer/zeros"
+ input: "global_step/cond/pred_id"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT64
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@global_step"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_step/cond/Merge"
+ op: "Merge"
+ input: "global_step/cond/Switch_1"
+ input: "global_step/cond/Identity"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT64
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_step/add/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT64
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT64
+ tensor_shape {
+ }
+ int64_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "global_step/add"
+ op: "Add"
+ input: "global_step/cond/Merge"
+ input: "global_step/add/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT64
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Const"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./tmp/epout/train.tf_record"
+ }
+ }
+ }
+}
+node {
+ name: "flat_filenames/shape"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: -1
+ }
+ }
+ }
+}
+node {
+ name: "flat_filenames"
+ op: "Reshape"
+ input: "Const"
+ input: "flat_filenames/shape"
+ device: "/device:CPU:0"
+ attr {
+ key: "T"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "TensorSliceDataset"
+ op: "TensorSliceDataset"
+ input: "flat_filenames"
+ device: "/device:CPU:0"
+ attr {
+ key: "Toutput_types"
+ value {
+ list {
+ type: DT_STRING
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "FlatMapDataset"
+ op: "FlatMapDataset"
+ input: "TensorSliceDataset"
+ device: "/device:CPU:0"
+ attr {
+ key: "Targuments"
+ value {
+ list {
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "f"
+ value {
+ func {
+ name: "__inference_Dataset_flat_map_read_one_file_31"
+ }
+ }
+ }
+ attr {
+ key: "output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "output_types"
+ value {
+ list {
+ type: DT_STRING
+ }
+ }
+ }
+}
+node {
+ name: "count"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT64
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT64
+ tensor_shape {
+ }
+ int64_val: -1
+ }
+ }
+ }
+}
+node {
+ name: "RepeatDataset"
+ op: "RepeatDataset"
+ input: "FlatMapDataset"
+ input: "count"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "output_types"
+ value {
+ list {
+ type: DT_STRING
+ }
+ }
+ }
+}
+node {
+ name: "buffer_size"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT64
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT64
+ tensor_shape {
+ }
+ int64_val: 100
+ }
+ }
+ }
+}
+node {
+ name: "seed"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT64
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT64
+ tensor_shape {
+ }
+ int64_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "seed2"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT64
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT64
+ tensor_shape {
+ }
+ int64_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "ShuffleDataset"
+ op: "ShuffleDataset"
+ input: "RepeatDataset"
+ input: "buffer_size"
+ input: "seed"
+ input: "seed2"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "output_types"
+ value {
+ list {
+ type: DT_STRING
+ }
+ }
+ }
+ attr {
+ key: "reshuffle_each_iteration"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "batch_size"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT64
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT64
+ tensor_shape {
+ }
+ int64_val: 32
+ }
+ }
+ }
+}
+node {
+ name: "num_parallel_calls"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT64
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT64
+ tensor_shape {
+ }
+ int64_val: 32
+ }
+ }
+ }
+}
+node {
+ name: "drop_remainder"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_BOOL
+ tensor_shape {
+ }
+ bool_val: true
+ }
+ }
+ }
+}
+node {
+ name: "ExperimentalMapAndBatchDataset"
+ op: "ExperimentalMapAndBatchDataset"
+ input: "ShuffleDataset"
+ input: "batch_size"
+ input: "num_parallel_calls"
+ input: "drop_remainder"
+ device: "/device:CPU:0"
+ attr {
+ key: "Targuments"
+ value {
+ list {
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "f"
+ value {
+ func {
+ name: "__inference_tf_data_experimental_map_and_batch__61"
+ }
+ }
+ }
+ attr {
+ key: "output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ }
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ }
+ shape {
+ dim {
+ size: 32
+ }
+ }
+ shape {
+ dim {
+ size: 32
+ }
+ }
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "output_types"
+ value {
+ list {
+ type: DT_INT32
+ type: DT_INT32
+ type: DT_INT32
+ type: DT_INT32
+ type: DT_INT32
+ }
+ }
+ }
+ attr {
+ key: "preserve_cardinality"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "optimizations"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 3
+ }
+ }
+ string_val: "map_and_batch_fusion"
+ string_val: "noop_elimination"
+ string_val: "shuffle_and_repeat_fusion"
+ }
+ }
+ }
+}
+node {
+ name: "OptimizeDataset"
+ op: "OptimizeDataset"
+ input: "ExperimentalMapAndBatchDataset"
+ input: "optimizations"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "optimization_configs"
+ value {
+ list {
+ s: "map_vectorization:use_choose_fastest:false"
+ }
+ }
+ }
+ attr {
+ key: "output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ }
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ }
+ shape {
+ dim {
+ size: 32
+ }
+ }
+ shape {
+ dim {
+ size: 32
+ }
+ }
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "output_types"
+ value {
+ list {
+ type: DT_INT32
+ type: DT_INT32
+ type: DT_INT32
+ type: DT_INT32
+ type: DT_INT32
+ }
+ }
+ }
+}
+node {
+ name: "ModelDataset"
+ op: "ModelDataset"
+ input: "OptimizeDataset"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "cpu_budget"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ }
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ }
+ shape {
+ dim {
+ size: 32
+ }
+ }
+ shape {
+ dim {
+ size: 32
+ }
+ }
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "output_types"
+ value {
+ list {
+ type: DT_INT32
+ type: DT_INT32
+ type: DT_INT32
+ type: DT_INT32
+ type: DT_INT32
+ }
+ }
+ }
+}
+node {
+ name: "IteratorV2"
+ op: "IteratorV2"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ }
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ }
+ shape {
+ dim {
+ size: 32
+ }
+ }
+ shape {
+ dim {
+ size: 32
+ }
+ }
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "output_types"
+ value {
+ list {
+ type: DT_INT32
+ type: DT_INT32
+ type: DT_INT32
+ type: DT_INT32
+ type: DT_INT32
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "MakeIterator"
+ op: "MakeIterator"
+ input: "ModelDataset"
+ input: "IteratorV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@IteratorV2"
+ }
+ }
+ }
+}
+node {
+ name: "IteratorToStringHandle"
+ op: "IteratorToStringHandle"
+ input: "IteratorV2"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "IteratorGetNext"
+ op: "IteratorGetNext"
+ input: "IteratorV2"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ }
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ }
+ shape {
+ dim {
+ size: 32
+ }
+ }
+ shape {
+ dim {
+ size: 32
+ }
+ }
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ }
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ }
+ shape {
+ dim {
+ size: 32
+ }
+ }
+ shape {
+ dim {
+ size: 32
+ }
+ }
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "output_types"
+ value {
+ list {
+ type: DT_INT32
+ type: DT_INT32
+ type: DT_INT32
+ type: DT_INT32
+ type: DT_INT32
+ }
+ }
+ }
+}
+node {
+ name: "Cast"
+ op: "Cast"
+ input: "IteratorGetNext:2"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/ExpandDims/dim"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: -1
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/ExpandDims"
+ op: "ExpandDims"
+ input: "IteratorGetNext"
+ input: "bert/embeddings/ExpandDims/dim"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "Tdim"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/word_embeddings/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/word_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\210R\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/word_embeddings/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/word_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/word_embeddings/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/word_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/word_embeddings/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/embeddings/word_embeddings/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/word_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 21128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/embeddings/word_embeddings/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/embeddings/word_embeddings/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/embeddings/word_embeddings/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/word_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 21128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/word_embeddings/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/embeddings/word_embeddings/Initializer/truncated_normal/mul"
+ input: "bert/embeddings/word_embeddings/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/word_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 21128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/word_embeddings"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/word_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 21128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 21128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/embeddings/word_embeddings/Assign"
+ op: "Assign"
+ input: "bert/embeddings/word_embeddings"
+ input: "bert/embeddings/word_embeddings/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/word_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 21128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/embeddings/word_embeddings/read"
+ op: "Identity"
+ input: "bert/embeddings/word_embeddings"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/word_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 21128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/Reshape/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: -1
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/Reshape"
+ op: "Reshape"
+ input: "bert/embeddings/ExpandDims"
+ input: "bert/embeddings/Reshape/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/GatherV2/axis"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/GatherV2"
+ op: "GatherV2"
+ input: "bert/embeddings/word_embeddings/read"
+ input: "bert/embeddings/Reshape"
+ input: "bert/embeddings/GatherV2/axis"
+ attr {
+ key: "Taxis"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "Tindices"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "Tparams"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "batch_dims"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/embeddings/Reshape_1/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 3
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/Reshape_1"
+ op: "Reshape"
+ input: "bert/embeddings/GatherV2"
+ input: "bert/embeddings/Reshape_1/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/token_type_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\002\000\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/token_type_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/token_type_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/token_type_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/token_type_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal/mul"
+ input: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/token_type_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/token_type_embeddings"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/token_type_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/embeddings/token_type_embeddings/Assign"
+ op: "Assign"
+ input: "bert/embeddings/token_type_embeddings"
+ input: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/token_type_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/embeddings/token_type_embeddings/read"
+ op: "Identity"
+ input: "bert/embeddings/token_type_embeddings"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/token_type_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/Reshape_2/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: -1
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/Reshape_2"
+ op: "Reshape"
+ input: "IteratorGetNext:4"
+ input: "bert/embeddings/Reshape_2/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/one_hot/on_value"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/one_hot/off_value"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/one_hot/depth"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/one_hot"
+ op: "OneHot"
+ input: "bert/embeddings/Reshape_2"
+ input: "bert/embeddings/one_hot/depth"
+ input: "bert/embeddings/one_hot/on_value"
+ input: "bert/embeddings/one_hot/off_value"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "TI"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "axis"
+ value {
+ i: -1
+ }
+ }
+}
+node {
+ name: "bert/embeddings/MatMul"
+ op: "MatMul"
+ input: "bert/embeddings/one_hot"
+ input: "bert/embeddings/token_type_embeddings/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/embeddings/Reshape_3/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 3
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/Reshape_3"
+ op: "Reshape"
+ input: "bert/embeddings/MatMul"
+ input: "bert/embeddings/Reshape_3/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/add"
+ op: "Add"
+ input: "bert/embeddings/Reshape_1"
+ input: "bert/embeddings/Reshape_3"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/assert_less_equal/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 128
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/assert_less_equal/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 512
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/assert_less_equal/LessEqual"
+ op: "LessEqual"
+ input: "bert/embeddings/assert_less_equal/x"
+ input: "bert/embeddings/assert_less_equal/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/assert_less_equal/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/assert_less_equal/All"
+ op: "All"
+ input: "bert/embeddings/assert_less_equal/LessEqual"
+ input: "bert/embeddings/assert_less_equal/Const"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/embeddings/assert_less_equal/Assert/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/assert_less_equal/Assert/Const_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "Condition x <= y did not hold element-wise:x (bert/embeddings/assert_less_equal/x:0) = "
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/assert_less_equal/Assert/Const_2"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "y (bert/embeddings/assert_less_equal/y:0) = "
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/assert_less_equal/Assert/Assert/data_0"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/assert_less_equal/Assert/Assert/data_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "Condition x <= y did not hold element-wise:x (bert/embeddings/assert_less_equal/x:0) = "
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/assert_less_equal/Assert/Assert/data_3"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "y (bert/embeddings/assert_less_equal/y:0) = "
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/assert_less_equal/Assert/Assert"
+ op: "Assert"
+ input: "bert/embeddings/assert_less_equal/All"
+ input: "bert/embeddings/assert_less_equal/Assert/Assert/data_0"
+ input: "bert/embeddings/assert_less_equal/Assert/Assert/data_1"
+ input: "bert/embeddings/assert_less_equal/x"
+ input: "bert/embeddings/assert_less_equal/Assert/Assert/data_3"
+ input: "bert/embeddings/assert_less_equal/y"
+ attr {
+ key: "T"
+ value {
+ list {
+ type: DT_STRING
+ type: DT_STRING
+ type: DT_INT32
+ type: DT_STRING
+ type: DT_INT32
+ }
+ }
+ }
+ attr {
+ key: "summarize"
+ value {
+ i: 3
+ }
+ }
+}
+node {
+ name: "bert/embeddings/position_embeddings/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/position_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\002\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/position_embeddings/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/position_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/position_embeddings/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/position_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/position_embeddings/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/embeddings/position_embeddings/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/position_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/embeddings/position_embeddings/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/embeddings/position_embeddings/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/embeddings/position_embeddings/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/position_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/position_embeddings/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/embeddings/position_embeddings/Initializer/truncated_normal/mul"
+ input: "bert/embeddings/position_embeddings/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/position_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/position_embeddings"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/position_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/embeddings/position_embeddings/Assign"
+ op: "Assign"
+ input: "bert/embeddings/position_embeddings"
+ input: "bert/embeddings/position_embeddings/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/position_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/embeddings/position_embeddings/read"
+ op: "Identity"
+ input: "bert/embeddings/position_embeddings"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/position_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/Slice/begin"
+ op: "Const"
+ input: "^bert/embeddings/assert_less_equal/Assert/Assert"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\000\000\000\000\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/Slice/size"
+ op: "Const"
+ input: "^bert/embeddings/assert_less_equal/Assert/Assert"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\200\000\000\000\377\377\377\377"
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/Slice"
+ op: "Slice"
+ input: "bert/embeddings/position_embeddings/read"
+ input: "bert/embeddings/Slice/begin"
+ input: "bert/embeddings/Slice/size"
+ attr {
+ key: "Index"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/Reshape_4/shape"
+ op: "Const"
+ input: "^bert/embeddings/assert_less_equal/Assert/Assert"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 3
+ }
+ }
+ tensor_content: "\001\000\000\000\200\000\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/Reshape_4"
+ op: "Reshape"
+ input: "bert/embeddings/Slice"
+ input: "bert/embeddings/Reshape_4/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/add_1"
+ op: "Add"
+ input: "bert/embeddings/add"
+ input: "bert/embeddings/Reshape_4"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/LayerNorm/beta/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/LayerNorm/beta"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/embeddings/LayerNorm/beta/Assign"
+ op: "Assign"
+ input: "bert/embeddings/LayerNorm/beta"
+ input: "bert/embeddings/LayerNorm/beta/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/embeddings/LayerNorm/beta/read"
+ op: "Identity"
+ input: "bert/embeddings/LayerNorm/beta"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/LayerNorm/gamma/Initializer/ones"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/LayerNorm/gamma"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/embeddings/LayerNorm/gamma/Assign"
+ op: "Assign"
+ input: "bert/embeddings/LayerNorm/gamma"
+ input: "bert/embeddings/LayerNorm/gamma/Initializer/ones"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/embeddings/LayerNorm/gamma/read"
+ op: "Identity"
+ input: "bert/embeddings/LayerNorm/gamma"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/LayerNorm/moments/mean/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/LayerNorm/moments/mean"
+ op: "Mean"
+ input: "bert/embeddings/add_1"
+ input: "bert/embeddings/LayerNorm/moments/mean/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/embeddings/LayerNorm/moments/StopGradient"
+ op: "StopGradient"
+ input: "bert/embeddings/LayerNorm/moments/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/LayerNorm/moments/SquaredDifference"
+ op: "SquaredDifference"
+ input: "bert/embeddings/add_1"
+ input: "bert/embeddings/LayerNorm/moments/StopGradient"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/LayerNorm/moments/variance/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/LayerNorm/moments/variance"
+ op: "Mean"
+ input: "bert/embeddings/LayerNorm/moments/SquaredDifference"
+ input: "bert/embeddings/LayerNorm/moments/variance/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/embeddings/LayerNorm/batchnorm/add/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999960041972e-13
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/LayerNorm/batchnorm/add"
+ op: "Add"
+ input: "bert/embeddings/LayerNorm/moments/variance"
+ input: "bert/embeddings/LayerNorm/batchnorm/add/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/LayerNorm/batchnorm/Rsqrt"
+ op: "Rsqrt"
+ input: "bert/embeddings/LayerNorm/batchnorm/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/LayerNorm/batchnorm/mul"
+ op: "Mul"
+ input: "bert/embeddings/LayerNorm/batchnorm/Rsqrt"
+ input: "bert/embeddings/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/LayerNorm/batchnorm/mul_1"
+ op: "Mul"
+ input: "bert/embeddings/add_1"
+ input: "bert/embeddings/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/LayerNorm/batchnorm/mul_2"
+ op: "Mul"
+ input: "bert/embeddings/LayerNorm/moments/mean"
+ input: "bert/embeddings/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/LayerNorm/batchnorm/sub"
+ op: "Sub"
+ input: "bert/embeddings/LayerNorm/beta/read"
+ input: "bert/embeddings/LayerNorm/batchnorm/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/LayerNorm/batchnorm/add_1"
+ op: "Add"
+ input: "bert/embeddings/LayerNorm/batchnorm/mul_1"
+ input: "bert/embeddings/LayerNorm/batchnorm/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/dropout/rate"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/dropout/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 3
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/dropout/random_uniform/min"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/dropout/random_uniform/max"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/dropout/random_uniform/RandomUniform"
+ op: "RandomUniform"
+ input: "bert/embeddings/dropout/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/embeddings/dropout/random_uniform/sub"
+ op: "Sub"
+ input: "bert/embeddings/dropout/random_uniform/max"
+ input: "bert/embeddings/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/dropout/random_uniform/mul"
+ op: "Mul"
+ input: "bert/embeddings/dropout/random_uniform/RandomUniform"
+ input: "bert/embeddings/dropout/random_uniform/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/dropout/random_uniform"
+ op: "Add"
+ input: "bert/embeddings/dropout/random_uniform/mul"
+ input: "bert/embeddings/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/dropout/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/dropout/sub"
+ op: "Sub"
+ input: "bert/embeddings/dropout/sub/x"
+ input: "bert/embeddings/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/dropout/truediv/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/dropout/truediv"
+ op: "RealDiv"
+ input: "bert/embeddings/dropout/truediv/x"
+ input: "bert/embeddings/dropout/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/dropout/GreaterEqual"
+ op: "GreaterEqual"
+ input: "bert/embeddings/dropout/random_uniform"
+ input: "bert/embeddings/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/dropout/mul"
+ op: "Mul"
+ input: "bert/embeddings/LayerNorm/batchnorm/add_1"
+ input: "bert/embeddings/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/dropout/Cast"
+ op: "Cast"
+ input: "bert/embeddings/dropout/GreaterEqual"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/dropout/mul_1"
+ op: "Mul"
+ input: "bert/embeddings/dropout/mul"
+ input: "bert/embeddings/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/Reshape/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 3
+ }
+ }
+ tensor_content: " \000\000\000\001\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/Reshape"
+ op: "Reshape"
+ input: "IteratorGetNext:1"
+ input: "bert/encoder/Reshape/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/Cast"
+ op: "Cast"
+ input: "bert/encoder/Reshape"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/ones/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 3
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/ones/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/ones"
+ op: "Fill"
+ input: "bert/encoder/ones/shape_as_tensor"
+ input: "bert/encoder/ones/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/mul"
+ op: "Mul"
+ input: "bert/encoder/ones"
+ input: "bert/encoder/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/Reshape_1/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\377\377\377\377\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/Reshape_1"
+ op: "Reshape"
+ input: "bert/embeddings/dropout/mul_1"
+ input: "bert/encoder/Reshape_1/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/query/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/query/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/query/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_0/attention/self/query/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/query/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_0/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_0/attention/self/query/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/query/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_0/attention/self/query/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_0/attention/self/query/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/query/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/query/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/query/kernel"
+ input: "bert/encoder/layer_0/attention/self/query/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/query/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/attention/self/query/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/query/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/query/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/query/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/query/bias"
+ input: "bert/encoder/layer_0/attention/self/query/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/query/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/attention/self/query/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/query/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/Reshape_1"
+ input: "bert/encoder/layer_0/attention/self/query/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/query/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_0/attention/self/query/MatMul"
+ input: "bert/encoder/layer_0/attention/self/query/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/key/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/key/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/key/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_0/attention/self/key/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/key/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_0/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_0/attention/self/key/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/key/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_0/attention/self/key/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_0/attention/self/key/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/key/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/key/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/key/kernel"
+ input: "bert/encoder/layer_0/attention/self/key/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/key/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/attention/self/key/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/key/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/key/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/key/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/key/bias"
+ input: "bert/encoder/layer_0/attention/self/key/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/key/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/attention/self/key/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/key/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/Reshape_1"
+ input: "bert/encoder/layer_0/attention/self/key/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/key/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_0/attention/self/key/MatMul"
+ input: "bert/encoder/layer_0/attention/self/key/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/value/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/value/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/value/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_0/attention/self/value/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/value/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_0/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_0/attention/self/value/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/value/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_0/attention/self/value/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_0/attention/self/value/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/value/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/value/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/value/kernel"
+ input: "bert/encoder/layer_0/attention/self/value/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/value/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/attention/self/value/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/value/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/value/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/value/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/value/bias"
+ input: "bert/encoder/layer_0/attention/self/value/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/value/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/attention/self/value/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/value/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/Reshape_1"
+ input: "bert/encoder/layer_0/attention/self/value/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/value/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_0/attention/self/value/MatMul"
+ input: "bert/encoder/layer_0/attention/self/value/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/Reshape/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/Reshape"
+ op: "Reshape"
+ input: "bert/encoder/layer_0/attention/self/query/BiasAdd"
+ input: "bert/encoder/layer_0/attention/self/Reshape/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/transpose/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/transpose"
+ op: "Transpose"
+ input: "bert/encoder/layer_0/attention/self/Reshape"
+ input: "bert/encoder/layer_0/attention/self/transpose/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/Reshape_1/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/Reshape_1"
+ op: "Reshape"
+ input: "bert/encoder/layer_0/attention/self/key/BiasAdd"
+ input: "bert/encoder/layer_0/attention/self/Reshape_1/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/transpose_1/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/transpose_1"
+ op: "Transpose"
+ input: "bert/encoder/layer_0/attention/self/Reshape_1"
+ input: "bert/encoder/layer_0/attention/self/transpose_1/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/MatMul"
+ op: "BatchMatMulV2"
+ input: "bert/encoder/layer_0/attention/self/transpose"
+ input: "bert/encoder/layer_0/attention/self/transpose_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/Mul/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.125
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/Mul"
+ op: "Mul"
+ input: "bert/encoder/layer_0/attention/self/MatMul"
+ input: "bert/encoder/layer_0/attention/self/Mul/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/ExpandDims/dim"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/ExpandDims"
+ op: "ExpandDims"
+ input: "bert/encoder/mul"
+ input: "bert/encoder/layer_0/attention/self/ExpandDims/dim"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tdim"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_0/attention/self/sub/x"
+ input: "bert/encoder/layer_0/attention/self/ExpandDims"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/mul_1/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: -10000.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_0/attention/self/sub"
+ input: "bert/encoder/layer_0/attention/self/mul_1/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/add"
+ op: "Add"
+ input: "bert/encoder/layer_0/attention/self/Mul"
+ input: "bert/encoder/layer_0/attention/self/mul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/Softmax"
+ op: "Softmax"
+ input: "bert/encoder/layer_0/attention/self/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/dropout/rate"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/dropout/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/dropout/random_uniform/min"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/dropout/random_uniform/max"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/dropout/random_uniform/RandomUniform"
+ op: "RandomUniform"
+ input: "bert/encoder/layer_0/attention/self/dropout/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/dropout/random_uniform/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_0/attention/self/dropout/random_uniform/max"
+ input: "bert/encoder/layer_0/attention/self/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/dropout/random_uniform/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_0/attention/self/dropout/random_uniform/RandomUniform"
+ input: "bert/encoder/layer_0/attention/self/dropout/random_uniform/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/dropout/random_uniform"
+ op: "Add"
+ input: "bert/encoder/layer_0/attention/self/dropout/random_uniform/mul"
+ input: "bert/encoder/layer_0/attention/self/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/dropout/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/dropout/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_0/attention/self/dropout/sub/x"
+ input: "bert/encoder/layer_0/attention/self/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/dropout/truediv/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/dropout/truediv"
+ op: "RealDiv"
+ input: "bert/encoder/layer_0/attention/self/dropout/truediv/x"
+ input: "bert/encoder/layer_0/attention/self/dropout/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/dropout/GreaterEqual"
+ op: "GreaterEqual"
+ input: "bert/encoder/layer_0/attention/self/dropout/random_uniform"
+ input: "bert/encoder/layer_0/attention/self/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/dropout/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_0/attention/self/Softmax"
+ input: "bert/encoder/layer_0/attention/self/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/dropout/Cast"
+ op: "Cast"
+ input: "bert/encoder/layer_0/attention/self/dropout/GreaterEqual"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/dropout/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_0/attention/self/dropout/mul"
+ input: "bert/encoder/layer_0/attention/self/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/Reshape_2/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/Reshape_2"
+ op: "Reshape"
+ input: "bert/encoder/layer_0/attention/self/value/BiasAdd"
+ input: "bert/encoder/layer_0/attention/self/Reshape_2/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/transpose_2/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/transpose_2"
+ op: "Transpose"
+ input: "bert/encoder/layer_0/attention/self/Reshape_2"
+ input: "bert/encoder/layer_0/attention/self/transpose_2/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/MatMul_1"
+ op: "BatchMatMulV2"
+ input: "bert/encoder/layer_0/attention/self/dropout/mul_1"
+ input: "bert/encoder/layer_0/attention/self/transpose_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/transpose_3/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/transpose_3"
+ op: "Transpose"
+ input: "bert/encoder/layer_0/attention/self/MatMul_1"
+ input: "bert/encoder/layer_0/attention/self/transpose_3/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/Reshape_3/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/Reshape_3"
+ op: "Reshape"
+ input: "bert/encoder/layer_0/attention/self/transpose_3"
+ input: "bert/encoder/layer_0/attention/self/Reshape_3/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dense/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dense/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dense/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_0/attention/output/dense/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dense/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_0/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_0/attention/output/dense/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dense/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_0/attention/output/dense/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_0/attention/output/dense/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dense/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dense/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/output/dense/kernel"
+ input: "bert/encoder/layer_0/attention/output/dense/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dense/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/attention/output/dense/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dense/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dense/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dense/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/output/dense/bias"
+ input: "bert/encoder/layer_0/attention/output/dense/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dense/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/attention/output/dense/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dense/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_0/attention/self/Reshape_3"
+ input: "bert/encoder/layer_0/attention/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dense/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_0/attention/output/dense/MatMul"
+ input: "bert/encoder/layer_0/attention/output/dense/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dropout/rate"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dropout/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dropout/random_uniform/min"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dropout/random_uniform/max"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dropout/random_uniform/RandomUniform"
+ op: "RandomUniform"
+ input: "bert/encoder/layer_0/attention/output/dropout/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dropout/random_uniform/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_0/attention/output/dropout/random_uniform/max"
+ input: "bert/encoder/layer_0/attention/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dropout/random_uniform/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_0/attention/output/dropout/random_uniform/RandomUniform"
+ input: "bert/encoder/layer_0/attention/output/dropout/random_uniform/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dropout/random_uniform"
+ op: "Add"
+ input: "bert/encoder/layer_0/attention/output/dropout/random_uniform/mul"
+ input: "bert/encoder/layer_0/attention/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dropout/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dropout/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_0/attention/output/dropout/sub/x"
+ input: "bert/encoder/layer_0/attention/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dropout/truediv/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dropout/truediv"
+ op: "RealDiv"
+ input: "bert/encoder/layer_0/attention/output/dropout/truediv/x"
+ input: "bert/encoder/layer_0/attention/output/dropout/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dropout/GreaterEqual"
+ op: "GreaterEqual"
+ input: "bert/encoder/layer_0/attention/output/dropout/random_uniform"
+ input: "bert/encoder/layer_0/attention/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dropout/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_0/attention/output/dense/BiasAdd"
+ input: "bert/encoder/layer_0/attention/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dropout/Cast"
+ op: "Cast"
+ input: "bert/encoder/layer_0/attention/output/dropout/GreaterEqual"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dropout/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_0/attention/output/dropout/mul"
+ input: "bert/encoder/layer_0/attention/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/add"
+ op: "Add"
+ input: "bert/encoder/layer_0/attention/output/dropout/mul_1"
+ input: "bert/encoder/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/LayerNorm/beta/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/LayerNorm/beta"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/LayerNorm/beta/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/beta"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/LayerNorm/beta/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/beta"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/Initializer/ones"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/LayerNorm/gamma"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/Initializer/ones"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/LayerNorm/moments/mean/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/LayerNorm/moments/mean"
+ op: "Mean"
+ input: "bert/encoder/layer_0/attention/output/add"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/moments/mean/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/LayerNorm/moments/StopGradient"
+ op: "StopGradient"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/moments/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference"
+ op: "SquaredDifference"
+ input: "bert/encoder/layer_0/attention/output/add"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/moments/StopGradient"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/LayerNorm/moments/variance/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/LayerNorm/moments/variance"
+ op: "Mean"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/moments/variance/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999960041972e-13
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add"
+ op: "Add"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/moments/variance"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/Rsqrt"
+ op: "Rsqrt"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/Rsqrt"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_0/attention/output/add"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2"
+ op: "Mul"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/moments/mean"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/read"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_1"
+ op: "Add"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_1"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_0/intermediate/dense/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_0/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_0/intermediate/dense/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_0/intermediate/dense/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_0/intermediate/dense/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/intermediate/dense/kernel"
+ input: "bert/encoder/layer_0/intermediate/dense/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/intermediate/dense/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/bias/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 3072
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/bias/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/bias/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_0/intermediate/dense/bias/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_0/intermediate/dense/bias/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/intermediate/dense/bias"
+ input: "bert/encoder/layer_0/intermediate/dense/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/intermediate/dense/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_0/intermediate/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_0/intermediate/dense/MatMul"
+ input: "bert/encoder/layer_0/intermediate/dense/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/Pow/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 3.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/Pow"
+ op: "Pow"
+ input: "bert/encoder/layer_0/intermediate/dense/BiasAdd"
+ input: "bert/encoder/layer_0/intermediate/dense/Pow/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/mul/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.044714998453855515
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_0/intermediate/dense/mul/x"
+ input: "bert/encoder/layer_0/intermediate/dense/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/add"
+ op: "Add"
+ input: "bert/encoder/layer_0/intermediate/dense/BiasAdd"
+ input: "bert/encoder/layer_0/intermediate/dense/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/mul_1/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.7978845834732056
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_0/intermediate/dense/mul_1/x"
+ input: "bert/encoder/layer_0/intermediate/dense/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/Tanh"
+ op: "Tanh"
+ input: "bert/encoder/layer_0/intermediate/dense/mul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/add_1/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/add_1"
+ op: "Add"
+ input: "bert/encoder/layer_0/intermediate/dense/add_1/x"
+ input: "bert/encoder/layer_0/intermediate/dense/Tanh"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/mul_2/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.5
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/mul_2"
+ op: "Mul"
+ input: "bert/encoder/layer_0/intermediate/dense/mul_2/x"
+ input: "bert/encoder/layer_0/intermediate/dense/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/mul_3"
+ op: "Mul"
+ input: "bert/encoder/layer_0/intermediate/dense/BiasAdd"
+ input: "bert/encoder/layer_0/intermediate/dense/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dense/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\014\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dense/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dense/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_0/output/dense/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dense/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_0/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_0/output/dense/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dense/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_0/output/dense/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_0/output/dense/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dense/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dense/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/output/dense/kernel"
+ input: "bert/encoder/layer_0/output/dense/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dense/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/output/dense/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dense/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dense/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dense/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/output/dense/bias"
+ input: "bert/encoder/layer_0/output/dense/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dense/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/output/dense/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dense/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_0/intermediate/dense/mul_3"
+ input: "bert/encoder/layer_0/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dense/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_0/output/dense/MatMul"
+ input: "bert/encoder/layer_0/output/dense/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dropout/rate"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dropout/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dropout/random_uniform/min"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dropout/random_uniform/max"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dropout/random_uniform/RandomUniform"
+ op: "RandomUniform"
+ input: "bert/encoder/layer_0/output/dropout/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dropout/random_uniform/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_0/output/dropout/random_uniform/max"
+ input: "bert/encoder/layer_0/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dropout/random_uniform/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_0/output/dropout/random_uniform/RandomUniform"
+ input: "bert/encoder/layer_0/output/dropout/random_uniform/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dropout/random_uniform"
+ op: "Add"
+ input: "bert/encoder/layer_0/output/dropout/random_uniform/mul"
+ input: "bert/encoder/layer_0/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dropout/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dropout/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_0/output/dropout/sub/x"
+ input: "bert/encoder/layer_0/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dropout/truediv/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dropout/truediv"
+ op: "RealDiv"
+ input: "bert/encoder/layer_0/output/dropout/truediv/x"
+ input: "bert/encoder/layer_0/output/dropout/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dropout/GreaterEqual"
+ op: "GreaterEqual"
+ input: "bert/encoder/layer_0/output/dropout/random_uniform"
+ input: "bert/encoder/layer_0/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dropout/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_0/output/dense/BiasAdd"
+ input: "bert/encoder/layer_0/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dropout/Cast"
+ op: "Cast"
+ input: "bert/encoder/layer_0/output/dropout/GreaterEqual"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dropout/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_0/output/dropout/mul"
+ input: "bert/encoder/layer_0/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/add"
+ op: "Add"
+ input: "bert/encoder/layer_0/output/dropout/mul_1"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/LayerNorm/beta/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/LayerNorm/beta"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/LayerNorm/beta/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/output/LayerNorm/beta"
+ input: "bert/encoder/layer_0/output/LayerNorm/beta/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/LayerNorm/beta/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/output/LayerNorm/beta"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/LayerNorm/gamma/Initializer/ones"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/LayerNorm/gamma"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/LayerNorm/gamma/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_0/output/LayerNorm/gamma/Initializer/ones"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/LayerNorm/gamma/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/output/LayerNorm/gamma"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/LayerNorm/moments/mean/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/LayerNorm/moments/mean"
+ op: "Mean"
+ input: "bert/encoder/layer_0/output/add"
+ input: "bert/encoder/layer_0/output/LayerNorm/moments/mean/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/LayerNorm/moments/StopGradient"
+ op: "StopGradient"
+ input: "bert/encoder/layer_0/output/LayerNorm/moments/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference"
+ op: "SquaredDifference"
+ input: "bert/encoder/layer_0/output/add"
+ input: "bert/encoder/layer_0/output/LayerNorm/moments/StopGradient"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/LayerNorm/moments/variance/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/LayerNorm/moments/variance"
+ op: "Mean"
+ input: "bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference"
+ input: "bert/encoder/layer_0/output/LayerNorm/moments/variance/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/LayerNorm/batchnorm/add/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999960041972e-13
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/LayerNorm/batchnorm/add"
+ op: "Add"
+ input: "bert/encoder/layer_0/output/LayerNorm/moments/variance"
+ input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/add/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/LayerNorm/batchnorm/Rsqrt"
+ op: "Rsqrt"
+ input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/LayerNorm/batchnorm/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/Rsqrt"
+ input: "bert/encoder/layer_0/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_0/output/add"
+ input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2"
+ op: "Mul"
+ input: "bert/encoder/layer_0/output/LayerNorm/moments/mean"
+ input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/LayerNorm/batchnorm/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_0/output/LayerNorm/beta/read"
+ input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1"
+ op: "Add"
+ input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_1"
+ input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/query/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/query/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/query/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_1/attention/self/query/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/query/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_1/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_1/attention/self/query/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/query/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_1/attention/self/query/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_1/attention/self/query/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/query/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/query/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/query/kernel"
+ input: "bert/encoder/layer_1/attention/self/query/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/query/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/attention/self/query/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/query/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/query/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/query/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/query/bias"
+ input: "bert/encoder/layer_1/attention/self/query/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/query/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/attention/self/query/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/query/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_1/attention/self/query/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/query/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_1/attention/self/query/MatMul"
+ input: "bert/encoder/layer_1/attention/self/query/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/key/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/key/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/key/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_1/attention/self/key/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/key/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_1/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_1/attention/self/key/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/key/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_1/attention/self/key/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_1/attention/self/key/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/key/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/key/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/key/kernel"
+ input: "bert/encoder/layer_1/attention/self/key/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/key/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/attention/self/key/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/key/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/key/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/key/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/key/bias"
+ input: "bert/encoder/layer_1/attention/self/key/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/key/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/attention/self/key/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/key/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_1/attention/self/key/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/key/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_1/attention/self/key/MatMul"
+ input: "bert/encoder/layer_1/attention/self/key/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/value/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/value/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/value/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_1/attention/self/value/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/value/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_1/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_1/attention/self/value/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/value/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_1/attention/self/value/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_1/attention/self/value/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/value/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/value/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/value/kernel"
+ input: "bert/encoder/layer_1/attention/self/value/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/value/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/attention/self/value/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/value/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/value/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/value/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/value/bias"
+ input: "bert/encoder/layer_1/attention/self/value/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/value/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/attention/self/value/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/value/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_1/attention/self/value/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/value/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_1/attention/self/value/MatMul"
+ input: "bert/encoder/layer_1/attention/self/value/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/Reshape/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/Reshape"
+ op: "Reshape"
+ input: "bert/encoder/layer_1/attention/self/query/BiasAdd"
+ input: "bert/encoder/layer_1/attention/self/Reshape/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/transpose/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/transpose"
+ op: "Transpose"
+ input: "bert/encoder/layer_1/attention/self/Reshape"
+ input: "bert/encoder/layer_1/attention/self/transpose/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/Reshape_1/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/Reshape_1"
+ op: "Reshape"
+ input: "bert/encoder/layer_1/attention/self/key/BiasAdd"
+ input: "bert/encoder/layer_1/attention/self/Reshape_1/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/transpose_1/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/transpose_1"
+ op: "Transpose"
+ input: "bert/encoder/layer_1/attention/self/Reshape_1"
+ input: "bert/encoder/layer_1/attention/self/transpose_1/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/MatMul"
+ op: "BatchMatMulV2"
+ input: "bert/encoder/layer_1/attention/self/transpose"
+ input: "bert/encoder/layer_1/attention/self/transpose_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/Mul/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.125
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/Mul"
+ op: "Mul"
+ input: "bert/encoder/layer_1/attention/self/MatMul"
+ input: "bert/encoder/layer_1/attention/self/Mul/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/ExpandDims/dim"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/ExpandDims"
+ op: "ExpandDims"
+ input: "bert/encoder/mul"
+ input: "bert/encoder/layer_1/attention/self/ExpandDims/dim"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tdim"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_1/attention/self/sub/x"
+ input: "bert/encoder/layer_1/attention/self/ExpandDims"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/mul_1/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: -10000.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_1/attention/self/sub"
+ input: "bert/encoder/layer_1/attention/self/mul_1/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/add"
+ op: "Add"
+ input: "bert/encoder/layer_1/attention/self/Mul"
+ input: "bert/encoder/layer_1/attention/self/mul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/Softmax"
+ op: "Softmax"
+ input: "bert/encoder/layer_1/attention/self/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/dropout/rate"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/dropout/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/dropout/random_uniform/min"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/dropout/random_uniform/max"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/dropout/random_uniform/RandomUniform"
+ op: "RandomUniform"
+ input: "bert/encoder/layer_1/attention/self/dropout/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/dropout/random_uniform/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_1/attention/self/dropout/random_uniform/max"
+ input: "bert/encoder/layer_1/attention/self/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/dropout/random_uniform/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_1/attention/self/dropout/random_uniform/RandomUniform"
+ input: "bert/encoder/layer_1/attention/self/dropout/random_uniform/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/dropout/random_uniform"
+ op: "Add"
+ input: "bert/encoder/layer_1/attention/self/dropout/random_uniform/mul"
+ input: "bert/encoder/layer_1/attention/self/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/dropout/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/dropout/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_1/attention/self/dropout/sub/x"
+ input: "bert/encoder/layer_1/attention/self/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/dropout/truediv/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/dropout/truediv"
+ op: "RealDiv"
+ input: "bert/encoder/layer_1/attention/self/dropout/truediv/x"
+ input: "bert/encoder/layer_1/attention/self/dropout/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/dropout/GreaterEqual"
+ op: "GreaterEqual"
+ input: "bert/encoder/layer_1/attention/self/dropout/random_uniform"
+ input: "bert/encoder/layer_1/attention/self/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/dropout/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_1/attention/self/Softmax"
+ input: "bert/encoder/layer_1/attention/self/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/dropout/Cast"
+ op: "Cast"
+ input: "bert/encoder/layer_1/attention/self/dropout/GreaterEqual"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/dropout/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_1/attention/self/dropout/mul"
+ input: "bert/encoder/layer_1/attention/self/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/Reshape_2/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/Reshape_2"
+ op: "Reshape"
+ input: "bert/encoder/layer_1/attention/self/value/BiasAdd"
+ input: "bert/encoder/layer_1/attention/self/Reshape_2/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/transpose_2/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/transpose_2"
+ op: "Transpose"
+ input: "bert/encoder/layer_1/attention/self/Reshape_2"
+ input: "bert/encoder/layer_1/attention/self/transpose_2/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/MatMul_1"
+ op: "BatchMatMulV2"
+ input: "bert/encoder/layer_1/attention/self/dropout/mul_1"
+ input: "bert/encoder/layer_1/attention/self/transpose_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/transpose_3/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/transpose_3"
+ op: "Transpose"
+ input: "bert/encoder/layer_1/attention/self/MatMul_1"
+ input: "bert/encoder/layer_1/attention/self/transpose_3/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/Reshape_3/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/Reshape_3"
+ op: "Reshape"
+ input: "bert/encoder/layer_1/attention/self/transpose_3"
+ input: "bert/encoder/layer_1/attention/self/Reshape_3/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dense/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dense/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dense/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_1/attention/output/dense/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dense/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_1/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_1/attention/output/dense/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dense/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_1/attention/output/dense/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_1/attention/output/dense/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dense/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dense/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/output/dense/kernel"
+ input: "bert/encoder/layer_1/attention/output/dense/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dense/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/attention/output/dense/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dense/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dense/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dense/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/output/dense/bias"
+ input: "bert/encoder/layer_1/attention/output/dense/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dense/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/attention/output/dense/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dense/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_1/attention/self/Reshape_3"
+ input: "bert/encoder/layer_1/attention/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dense/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_1/attention/output/dense/MatMul"
+ input: "bert/encoder/layer_1/attention/output/dense/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dropout/rate"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dropout/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dropout/random_uniform/min"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dropout/random_uniform/max"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dropout/random_uniform/RandomUniform"
+ op: "RandomUniform"
+ input: "bert/encoder/layer_1/attention/output/dropout/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dropout/random_uniform/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_1/attention/output/dropout/random_uniform/max"
+ input: "bert/encoder/layer_1/attention/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dropout/random_uniform/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_1/attention/output/dropout/random_uniform/RandomUniform"
+ input: "bert/encoder/layer_1/attention/output/dropout/random_uniform/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dropout/random_uniform"
+ op: "Add"
+ input: "bert/encoder/layer_1/attention/output/dropout/random_uniform/mul"
+ input: "bert/encoder/layer_1/attention/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dropout/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dropout/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_1/attention/output/dropout/sub/x"
+ input: "bert/encoder/layer_1/attention/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dropout/truediv/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dropout/truediv"
+ op: "RealDiv"
+ input: "bert/encoder/layer_1/attention/output/dropout/truediv/x"
+ input: "bert/encoder/layer_1/attention/output/dropout/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dropout/GreaterEqual"
+ op: "GreaterEqual"
+ input: "bert/encoder/layer_1/attention/output/dropout/random_uniform"
+ input: "bert/encoder/layer_1/attention/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dropout/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_1/attention/output/dense/BiasAdd"
+ input: "bert/encoder/layer_1/attention/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dropout/Cast"
+ op: "Cast"
+ input: "bert/encoder/layer_1/attention/output/dropout/GreaterEqual"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dropout/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_1/attention/output/dropout/mul"
+ input: "bert/encoder/layer_1/attention/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/add"
+ op: "Add"
+ input: "bert/encoder/layer_1/attention/output/dropout/mul_1"
+ input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/LayerNorm/beta/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/LayerNorm/beta"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/LayerNorm/beta/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/beta"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/LayerNorm/beta/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/beta"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/Initializer/ones"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/LayerNorm/gamma"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/Initializer/ones"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/LayerNorm/moments/mean/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/LayerNorm/moments/mean"
+ op: "Mean"
+ input: "bert/encoder/layer_1/attention/output/add"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/moments/mean/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/LayerNorm/moments/StopGradient"
+ op: "StopGradient"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/moments/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference"
+ op: "SquaredDifference"
+ input: "bert/encoder/layer_1/attention/output/add"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/moments/StopGradient"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/LayerNorm/moments/variance/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/LayerNorm/moments/variance"
+ op: "Mean"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/moments/variance/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999960041972e-13
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add"
+ op: "Add"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/moments/variance"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/Rsqrt"
+ op: "Rsqrt"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/Rsqrt"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_1/attention/output/add"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2"
+ op: "Mul"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/moments/mean"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/read"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_1"
+ op: "Add"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_1"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_1/intermediate/dense/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_1/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_1/intermediate/dense/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_1/intermediate/dense/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_1/intermediate/dense/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/intermediate/dense/kernel"
+ input: "bert/encoder/layer_1/intermediate/dense/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/intermediate/dense/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/bias/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 3072
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/bias/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/bias/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_1/intermediate/dense/bias/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_1/intermediate/dense/bias/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/intermediate/dense/bias"
+ input: "bert/encoder/layer_1/intermediate/dense/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/intermediate/dense/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_1/intermediate/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_1/intermediate/dense/MatMul"
+ input: "bert/encoder/layer_1/intermediate/dense/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/Pow/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 3.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/Pow"
+ op: "Pow"
+ input: "bert/encoder/layer_1/intermediate/dense/BiasAdd"
+ input: "bert/encoder/layer_1/intermediate/dense/Pow/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/mul/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.044714998453855515
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_1/intermediate/dense/mul/x"
+ input: "bert/encoder/layer_1/intermediate/dense/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/add"
+ op: "Add"
+ input: "bert/encoder/layer_1/intermediate/dense/BiasAdd"
+ input: "bert/encoder/layer_1/intermediate/dense/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/mul_1/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.7978845834732056
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_1/intermediate/dense/mul_1/x"
+ input: "bert/encoder/layer_1/intermediate/dense/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/Tanh"
+ op: "Tanh"
+ input: "bert/encoder/layer_1/intermediate/dense/mul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/add_1/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/add_1"
+ op: "Add"
+ input: "bert/encoder/layer_1/intermediate/dense/add_1/x"
+ input: "bert/encoder/layer_1/intermediate/dense/Tanh"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/mul_2/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.5
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/mul_2"
+ op: "Mul"
+ input: "bert/encoder/layer_1/intermediate/dense/mul_2/x"
+ input: "bert/encoder/layer_1/intermediate/dense/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/mul_3"
+ op: "Mul"
+ input: "bert/encoder/layer_1/intermediate/dense/BiasAdd"
+ input: "bert/encoder/layer_1/intermediate/dense/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dense/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\014\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dense/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dense/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_1/output/dense/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dense/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_1/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_1/output/dense/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dense/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_1/output/dense/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_1/output/dense/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dense/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dense/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/output/dense/kernel"
+ input: "bert/encoder/layer_1/output/dense/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dense/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/output/dense/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dense/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dense/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dense/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/output/dense/bias"
+ input: "bert/encoder/layer_1/output/dense/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dense/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/output/dense/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dense/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_1/intermediate/dense/mul_3"
+ input: "bert/encoder/layer_1/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dense/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_1/output/dense/MatMul"
+ input: "bert/encoder/layer_1/output/dense/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dropout/rate"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dropout/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dropout/random_uniform/min"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dropout/random_uniform/max"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dropout/random_uniform/RandomUniform"
+ op: "RandomUniform"
+ input: "bert/encoder/layer_1/output/dropout/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dropout/random_uniform/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_1/output/dropout/random_uniform/max"
+ input: "bert/encoder/layer_1/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dropout/random_uniform/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_1/output/dropout/random_uniform/RandomUniform"
+ input: "bert/encoder/layer_1/output/dropout/random_uniform/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dropout/random_uniform"
+ op: "Add"
+ input: "bert/encoder/layer_1/output/dropout/random_uniform/mul"
+ input: "bert/encoder/layer_1/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dropout/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dropout/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_1/output/dropout/sub/x"
+ input: "bert/encoder/layer_1/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dropout/truediv/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dropout/truediv"
+ op: "RealDiv"
+ input: "bert/encoder/layer_1/output/dropout/truediv/x"
+ input: "bert/encoder/layer_1/output/dropout/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dropout/GreaterEqual"
+ op: "GreaterEqual"
+ input: "bert/encoder/layer_1/output/dropout/random_uniform"
+ input: "bert/encoder/layer_1/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dropout/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_1/output/dense/BiasAdd"
+ input: "bert/encoder/layer_1/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dropout/Cast"
+ op: "Cast"
+ input: "bert/encoder/layer_1/output/dropout/GreaterEqual"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dropout/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_1/output/dropout/mul"
+ input: "bert/encoder/layer_1/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/add"
+ op: "Add"
+ input: "bert/encoder/layer_1/output/dropout/mul_1"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/LayerNorm/beta/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/LayerNorm/beta"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/LayerNorm/beta/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/output/LayerNorm/beta"
+ input: "bert/encoder/layer_1/output/LayerNorm/beta/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/LayerNorm/beta/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/output/LayerNorm/beta"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/LayerNorm/gamma/Initializer/ones"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/LayerNorm/gamma"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/LayerNorm/gamma/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_1/output/LayerNorm/gamma/Initializer/ones"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/LayerNorm/gamma/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/output/LayerNorm/gamma"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/LayerNorm/moments/mean/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/LayerNorm/moments/mean"
+ op: "Mean"
+ input: "bert/encoder/layer_1/output/add"
+ input: "bert/encoder/layer_1/output/LayerNorm/moments/mean/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/LayerNorm/moments/StopGradient"
+ op: "StopGradient"
+ input: "bert/encoder/layer_1/output/LayerNorm/moments/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference"
+ op: "SquaredDifference"
+ input: "bert/encoder/layer_1/output/add"
+ input: "bert/encoder/layer_1/output/LayerNorm/moments/StopGradient"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/LayerNorm/moments/variance/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/LayerNorm/moments/variance"
+ op: "Mean"
+ input: "bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference"
+ input: "bert/encoder/layer_1/output/LayerNorm/moments/variance/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/LayerNorm/batchnorm/add/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999960041972e-13
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/LayerNorm/batchnorm/add"
+ op: "Add"
+ input: "bert/encoder/layer_1/output/LayerNorm/moments/variance"
+ input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/add/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/LayerNorm/batchnorm/Rsqrt"
+ op: "Rsqrt"
+ input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/LayerNorm/batchnorm/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/Rsqrt"
+ input: "bert/encoder/layer_1/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_1/output/add"
+ input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2"
+ op: "Mul"
+ input: "bert/encoder/layer_1/output/LayerNorm/moments/mean"
+ input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/LayerNorm/batchnorm/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_1/output/LayerNorm/beta/read"
+ input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1"
+ op: "Add"
+ input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_1"
+ input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/query/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/query/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/query/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_2/attention/self/query/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/query/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_2/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_2/attention/self/query/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/query/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_2/attention/self/query/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_2/attention/self/query/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/query/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/query/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/query/kernel"
+ input: "bert/encoder/layer_2/attention/self/query/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/query/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/attention/self/query/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/query/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/query/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/query/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/query/bias"
+ input: "bert/encoder/layer_2/attention/self/query/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/query/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/attention/self/query/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/query/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_2/attention/self/query/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/query/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_2/attention/self/query/MatMul"
+ input: "bert/encoder/layer_2/attention/self/query/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/key/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/key/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/key/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_2/attention/self/key/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/key/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_2/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_2/attention/self/key/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/key/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_2/attention/self/key/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_2/attention/self/key/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/key/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/key/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/key/kernel"
+ input: "bert/encoder/layer_2/attention/self/key/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/key/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/attention/self/key/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/key/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/key/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/key/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/key/bias"
+ input: "bert/encoder/layer_2/attention/self/key/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/key/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/attention/self/key/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/key/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_2/attention/self/key/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/key/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_2/attention/self/key/MatMul"
+ input: "bert/encoder/layer_2/attention/self/key/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/value/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/value/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/value/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_2/attention/self/value/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/value/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_2/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_2/attention/self/value/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/value/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_2/attention/self/value/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_2/attention/self/value/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/value/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/value/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/value/kernel"
+ input: "bert/encoder/layer_2/attention/self/value/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/value/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/attention/self/value/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/value/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/value/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/value/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/value/bias"
+ input: "bert/encoder/layer_2/attention/self/value/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/value/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/attention/self/value/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/value/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_2/attention/self/value/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/value/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_2/attention/self/value/MatMul"
+ input: "bert/encoder/layer_2/attention/self/value/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/Reshape/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/Reshape"
+ op: "Reshape"
+ input: "bert/encoder/layer_2/attention/self/query/BiasAdd"
+ input: "bert/encoder/layer_2/attention/self/Reshape/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/transpose/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/transpose"
+ op: "Transpose"
+ input: "bert/encoder/layer_2/attention/self/Reshape"
+ input: "bert/encoder/layer_2/attention/self/transpose/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/Reshape_1/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/Reshape_1"
+ op: "Reshape"
+ input: "bert/encoder/layer_2/attention/self/key/BiasAdd"
+ input: "bert/encoder/layer_2/attention/self/Reshape_1/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/transpose_1/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/transpose_1"
+ op: "Transpose"
+ input: "bert/encoder/layer_2/attention/self/Reshape_1"
+ input: "bert/encoder/layer_2/attention/self/transpose_1/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/MatMul"
+ op: "BatchMatMulV2"
+ input: "bert/encoder/layer_2/attention/self/transpose"
+ input: "bert/encoder/layer_2/attention/self/transpose_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/Mul/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.125
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/Mul"
+ op: "Mul"
+ input: "bert/encoder/layer_2/attention/self/MatMul"
+ input: "bert/encoder/layer_2/attention/self/Mul/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/ExpandDims/dim"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/ExpandDims"
+ op: "ExpandDims"
+ input: "bert/encoder/mul"
+ input: "bert/encoder/layer_2/attention/self/ExpandDims/dim"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tdim"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_2/attention/self/sub/x"
+ input: "bert/encoder/layer_2/attention/self/ExpandDims"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/mul_1/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: -10000.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_2/attention/self/sub"
+ input: "bert/encoder/layer_2/attention/self/mul_1/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/add"
+ op: "Add"
+ input: "bert/encoder/layer_2/attention/self/Mul"
+ input: "bert/encoder/layer_2/attention/self/mul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/Softmax"
+ op: "Softmax"
+ input: "bert/encoder/layer_2/attention/self/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/dropout/rate"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/dropout/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/dropout/random_uniform/min"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/dropout/random_uniform/max"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/dropout/random_uniform/RandomUniform"
+ op: "RandomUniform"
+ input: "bert/encoder/layer_2/attention/self/dropout/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/dropout/random_uniform/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_2/attention/self/dropout/random_uniform/max"
+ input: "bert/encoder/layer_2/attention/self/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/dropout/random_uniform/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_2/attention/self/dropout/random_uniform/RandomUniform"
+ input: "bert/encoder/layer_2/attention/self/dropout/random_uniform/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/dropout/random_uniform"
+ op: "Add"
+ input: "bert/encoder/layer_2/attention/self/dropout/random_uniform/mul"
+ input: "bert/encoder/layer_2/attention/self/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/dropout/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/dropout/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_2/attention/self/dropout/sub/x"
+ input: "bert/encoder/layer_2/attention/self/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/dropout/truediv/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/dropout/truediv"
+ op: "RealDiv"
+ input: "bert/encoder/layer_2/attention/self/dropout/truediv/x"
+ input: "bert/encoder/layer_2/attention/self/dropout/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/dropout/GreaterEqual"
+ op: "GreaterEqual"
+ input: "bert/encoder/layer_2/attention/self/dropout/random_uniform"
+ input: "bert/encoder/layer_2/attention/self/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/dropout/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_2/attention/self/Softmax"
+ input: "bert/encoder/layer_2/attention/self/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/dropout/Cast"
+ op: "Cast"
+ input: "bert/encoder/layer_2/attention/self/dropout/GreaterEqual"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/dropout/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_2/attention/self/dropout/mul"
+ input: "bert/encoder/layer_2/attention/self/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/Reshape_2/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/Reshape_2"
+ op: "Reshape"
+ input: "bert/encoder/layer_2/attention/self/value/BiasAdd"
+ input: "bert/encoder/layer_2/attention/self/Reshape_2/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/transpose_2/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/transpose_2"
+ op: "Transpose"
+ input: "bert/encoder/layer_2/attention/self/Reshape_2"
+ input: "bert/encoder/layer_2/attention/self/transpose_2/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/MatMul_1"
+ op: "BatchMatMulV2"
+ input: "bert/encoder/layer_2/attention/self/dropout/mul_1"
+ input: "bert/encoder/layer_2/attention/self/transpose_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/transpose_3/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/transpose_3"
+ op: "Transpose"
+ input: "bert/encoder/layer_2/attention/self/MatMul_1"
+ input: "bert/encoder/layer_2/attention/self/transpose_3/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/Reshape_3/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/Reshape_3"
+ op: "Reshape"
+ input: "bert/encoder/layer_2/attention/self/transpose_3"
+ input: "bert/encoder/layer_2/attention/self/Reshape_3/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dense/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dense/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dense/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_2/attention/output/dense/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dense/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_2/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_2/attention/output/dense/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dense/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_2/attention/output/dense/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_2/attention/output/dense/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dense/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dense/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/output/dense/kernel"
+ input: "bert/encoder/layer_2/attention/output/dense/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dense/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/attention/output/dense/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dense/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dense/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dense/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/output/dense/bias"
+ input: "bert/encoder/layer_2/attention/output/dense/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dense/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/attention/output/dense/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dense/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_2/attention/self/Reshape_3"
+ input: "bert/encoder/layer_2/attention/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dense/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_2/attention/output/dense/MatMul"
+ input: "bert/encoder/layer_2/attention/output/dense/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dropout/rate"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dropout/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dropout/random_uniform/min"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dropout/random_uniform/max"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dropout/random_uniform/RandomUniform"
+ op: "RandomUniform"
+ input: "bert/encoder/layer_2/attention/output/dropout/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dropout/random_uniform/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_2/attention/output/dropout/random_uniform/max"
+ input: "bert/encoder/layer_2/attention/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dropout/random_uniform/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_2/attention/output/dropout/random_uniform/RandomUniform"
+ input: "bert/encoder/layer_2/attention/output/dropout/random_uniform/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dropout/random_uniform"
+ op: "Add"
+ input: "bert/encoder/layer_2/attention/output/dropout/random_uniform/mul"
+ input: "bert/encoder/layer_2/attention/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dropout/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dropout/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_2/attention/output/dropout/sub/x"
+ input: "bert/encoder/layer_2/attention/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dropout/truediv/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dropout/truediv"
+ op: "RealDiv"
+ input: "bert/encoder/layer_2/attention/output/dropout/truediv/x"
+ input: "bert/encoder/layer_2/attention/output/dropout/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dropout/GreaterEqual"
+ op: "GreaterEqual"
+ input: "bert/encoder/layer_2/attention/output/dropout/random_uniform"
+ input: "bert/encoder/layer_2/attention/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dropout/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_2/attention/output/dense/BiasAdd"
+ input: "bert/encoder/layer_2/attention/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dropout/Cast"
+ op: "Cast"
+ input: "bert/encoder/layer_2/attention/output/dropout/GreaterEqual"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dropout/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_2/attention/output/dropout/mul"
+ input: "bert/encoder/layer_2/attention/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/add"
+ op: "Add"
+ input: "bert/encoder/layer_2/attention/output/dropout/mul_1"
+ input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/LayerNorm/beta/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/LayerNorm/beta"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/LayerNorm/beta/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/beta"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/LayerNorm/beta/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/beta"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/Initializer/ones"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/LayerNorm/gamma"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/Initializer/ones"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/LayerNorm/moments/mean/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/LayerNorm/moments/mean"
+ op: "Mean"
+ input: "bert/encoder/layer_2/attention/output/add"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/moments/mean/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/LayerNorm/moments/StopGradient"
+ op: "StopGradient"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/moments/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference"
+ op: "SquaredDifference"
+ input: "bert/encoder/layer_2/attention/output/add"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/moments/StopGradient"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/LayerNorm/moments/variance/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/LayerNorm/moments/variance"
+ op: "Mean"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/moments/variance/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999960041972e-13
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add"
+ op: "Add"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/moments/variance"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/Rsqrt"
+ op: "Rsqrt"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/Rsqrt"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_2/attention/output/add"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2"
+ op: "Mul"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/moments/mean"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/read"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_1"
+ op: "Add"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_1"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_2/intermediate/dense/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_2/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_2/intermediate/dense/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_2/intermediate/dense/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_2/intermediate/dense/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/intermediate/dense/kernel"
+ input: "bert/encoder/layer_2/intermediate/dense/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/intermediate/dense/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/bias/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 3072
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/bias/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/bias/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_2/intermediate/dense/bias/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_2/intermediate/dense/bias/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/intermediate/dense/bias"
+ input: "bert/encoder/layer_2/intermediate/dense/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/intermediate/dense/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_2/intermediate/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_2/intermediate/dense/MatMul"
+ input: "bert/encoder/layer_2/intermediate/dense/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/Pow/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 3.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/Pow"
+ op: "Pow"
+ input: "bert/encoder/layer_2/intermediate/dense/BiasAdd"
+ input: "bert/encoder/layer_2/intermediate/dense/Pow/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/mul/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.044714998453855515
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_2/intermediate/dense/mul/x"
+ input: "bert/encoder/layer_2/intermediate/dense/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/add"
+ op: "Add"
+ input: "bert/encoder/layer_2/intermediate/dense/BiasAdd"
+ input: "bert/encoder/layer_2/intermediate/dense/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/mul_1/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.7978845834732056
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_2/intermediate/dense/mul_1/x"
+ input: "bert/encoder/layer_2/intermediate/dense/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/Tanh"
+ op: "Tanh"
+ input: "bert/encoder/layer_2/intermediate/dense/mul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/add_1/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/add_1"
+ op: "Add"
+ input: "bert/encoder/layer_2/intermediate/dense/add_1/x"
+ input: "bert/encoder/layer_2/intermediate/dense/Tanh"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/mul_2/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.5
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/mul_2"
+ op: "Mul"
+ input: "bert/encoder/layer_2/intermediate/dense/mul_2/x"
+ input: "bert/encoder/layer_2/intermediate/dense/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/mul_3"
+ op: "Mul"
+ input: "bert/encoder/layer_2/intermediate/dense/BiasAdd"
+ input: "bert/encoder/layer_2/intermediate/dense/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dense/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\014\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dense/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dense/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_2/output/dense/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dense/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_2/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_2/output/dense/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dense/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_2/output/dense/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_2/output/dense/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dense/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dense/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/output/dense/kernel"
+ input: "bert/encoder/layer_2/output/dense/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dense/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/output/dense/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dense/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dense/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dense/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/output/dense/bias"
+ input: "bert/encoder/layer_2/output/dense/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dense/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/output/dense/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dense/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_2/intermediate/dense/mul_3"
+ input: "bert/encoder/layer_2/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dense/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_2/output/dense/MatMul"
+ input: "bert/encoder/layer_2/output/dense/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dropout/rate"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dropout/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dropout/random_uniform/min"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dropout/random_uniform/max"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dropout/random_uniform/RandomUniform"
+ op: "RandomUniform"
+ input: "bert/encoder/layer_2/output/dropout/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dropout/random_uniform/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_2/output/dropout/random_uniform/max"
+ input: "bert/encoder/layer_2/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dropout/random_uniform/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_2/output/dropout/random_uniform/RandomUniform"
+ input: "bert/encoder/layer_2/output/dropout/random_uniform/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dropout/random_uniform"
+ op: "Add"
+ input: "bert/encoder/layer_2/output/dropout/random_uniform/mul"
+ input: "bert/encoder/layer_2/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dropout/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dropout/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_2/output/dropout/sub/x"
+ input: "bert/encoder/layer_2/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dropout/truediv/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dropout/truediv"
+ op: "RealDiv"
+ input: "bert/encoder/layer_2/output/dropout/truediv/x"
+ input: "bert/encoder/layer_2/output/dropout/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dropout/GreaterEqual"
+ op: "GreaterEqual"
+ input: "bert/encoder/layer_2/output/dropout/random_uniform"
+ input: "bert/encoder/layer_2/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dropout/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_2/output/dense/BiasAdd"
+ input: "bert/encoder/layer_2/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dropout/Cast"
+ op: "Cast"
+ input: "bert/encoder/layer_2/output/dropout/GreaterEqual"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dropout/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_2/output/dropout/mul"
+ input: "bert/encoder/layer_2/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/add"
+ op: "Add"
+ input: "bert/encoder/layer_2/output/dropout/mul_1"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/LayerNorm/beta/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/LayerNorm/beta"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/LayerNorm/beta/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/output/LayerNorm/beta"
+ input: "bert/encoder/layer_2/output/LayerNorm/beta/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/LayerNorm/beta/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/output/LayerNorm/beta"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/LayerNorm/gamma/Initializer/ones"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/LayerNorm/gamma"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/LayerNorm/gamma/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_2/output/LayerNorm/gamma/Initializer/ones"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/LayerNorm/gamma/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/output/LayerNorm/gamma"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/LayerNorm/moments/mean/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/LayerNorm/moments/mean"
+ op: "Mean"
+ input: "bert/encoder/layer_2/output/add"
+ input: "bert/encoder/layer_2/output/LayerNorm/moments/mean/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/LayerNorm/moments/StopGradient"
+ op: "StopGradient"
+ input: "bert/encoder/layer_2/output/LayerNorm/moments/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference"
+ op: "SquaredDifference"
+ input: "bert/encoder/layer_2/output/add"
+ input: "bert/encoder/layer_2/output/LayerNorm/moments/StopGradient"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/LayerNorm/moments/variance/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/LayerNorm/moments/variance"
+ op: "Mean"
+ input: "bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference"
+ input: "bert/encoder/layer_2/output/LayerNorm/moments/variance/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/LayerNorm/batchnorm/add/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999960041972e-13
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/LayerNorm/batchnorm/add"
+ op: "Add"
+ input: "bert/encoder/layer_2/output/LayerNorm/moments/variance"
+ input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/add/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/LayerNorm/batchnorm/Rsqrt"
+ op: "Rsqrt"
+ input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/LayerNorm/batchnorm/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/Rsqrt"
+ input: "bert/encoder/layer_2/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_2/output/add"
+ input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2"
+ op: "Mul"
+ input: "bert/encoder/layer_2/output/LayerNorm/moments/mean"
+ input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/LayerNorm/batchnorm/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_2/output/LayerNorm/beta/read"
+ input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1"
+ op: "Add"
+ input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_1"
+ input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/query/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/query/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/query/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_3/attention/self/query/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/query/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_3/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_3/attention/self/query/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/query/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_3/attention/self/query/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_3/attention/self/query/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/query/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/query/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/query/kernel"
+ input: "bert/encoder/layer_3/attention/self/query/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/query/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/attention/self/query/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/query/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/query/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/query/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/query/bias"
+ input: "bert/encoder/layer_3/attention/self/query/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/query/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/attention/self/query/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/query/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_3/attention/self/query/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/query/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_3/attention/self/query/MatMul"
+ input: "bert/encoder/layer_3/attention/self/query/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/key/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/key/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/key/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_3/attention/self/key/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/key/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_3/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_3/attention/self/key/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/key/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_3/attention/self/key/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_3/attention/self/key/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/key/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/key/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/key/kernel"
+ input: "bert/encoder/layer_3/attention/self/key/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/key/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/attention/self/key/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/key/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/key/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/key/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/key/bias"
+ input: "bert/encoder/layer_3/attention/self/key/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/key/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/attention/self/key/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/key/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_3/attention/self/key/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/key/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_3/attention/self/key/MatMul"
+ input: "bert/encoder/layer_3/attention/self/key/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/value/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/value/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/value/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_3/attention/self/value/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/value/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_3/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_3/attention/self/value/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/value/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_3/attention/self/value/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_3/attention/self/value/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/value/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/value/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/value/kernel"
+ input: "bert/encoder/layer_3/attention/self/value/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/value/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/attention/self/value/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/value/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/value/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/value/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/value/bias"
+ input: "bert/encoder/layer_3/attention/self/value/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/value/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/attention/self/value/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/value/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_3/attention/self/value/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/value/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_3/attention/self/value/MatMul"
+ input: "bert/encoder/layer_3/attention/self/value/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/Reshape/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/Reshape"
+ op: "Reshape"
+ input: "bert/encoder/layer_3/attention/self/query/BiasAdd"
+ input: "bert/encoder/layer_3/attention/self/Reshape/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/transpose/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/transpose"
+ op: "Transpose"
+ input: "bert/encoder/layer_3/attention/self/Reshape"
+ input: "bert/encoder/layer_3/attention/self/transpose/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/Reshape_1/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/Reshape_1"
+ op: "Reshape"
+ input: "bert/encoder/layer_3/attention/self/key/BiasAdd"
+ input: "bert/encoder/layer_3/attention/self/Reshape_1/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/transpose_1/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/transpose_1"
+ op: "Transpose"
+ input: "bert/encoder/layer_3/attention/self/Reshape_1"
+ input: "bert/encoder/layer_3/attention/self/transpose_1/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/MatMul"
+ op: "BatchMatMulV2"
+ input: "bert/encoder/layer_3/attention/self/transpose"
+ input: "bert/encoder/layer_3/attention/self/transpose_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/Mul/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.125
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/Mul"
+ op: "Mul"
+ input: "bert/encoder/layer_3/attention/self/MatMul"
+ input: "bert/encoder/layer_3/attention/self/Mul/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/ExpandDims/dim"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/ExpandDims"
+ op: "ExpandDims"
+ input: "bert/encoder/mul"
+ input: "bert/encoder/layer_3/attention/self/ExpandDims/dim"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tdim"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_3/attention/self/sub/x"
+ input: "bert/encoder/layer_3/attention/self/ExpandDims"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/mul_1/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: -10000.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_3/attention/self/sub"
+ input: "bert/encoder/layer_3/attention/self/mul_1/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/add"
+ op: "Add"
+ input: "bert/encoder/layer_3/attention/self/Mul"
+ input: "bert/encoder/layer_3/attention/self/mul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/Softmax"
+ op: "Softmax"
+ input: "bert/encoder/layer_3/attention/self/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/dropout/rate"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/dropout/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/dropout/random_uniform/min"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/dropout/random_uniform/max"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/dropout/random_uniform/RandomUniform"
+ op: "RandomUniform"
+ input: "bert/encoder/layer_3/attention/self/dropout/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/dropout/random_uniform/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_3/attention/self/dropout/random_uniform/max"
+ input: "bert/encoder/layer_3/attention/self/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/dropout/random_uniform/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_3/attention/self/dropout/random_uniform/RandomUniform"
+ input: "bert/encoder/layer_3/attention/self/dropout/random_uniform/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/dropout/random_uniform"
+ op: "Add"
+ input: "bert/encoder/layer_3/attention/self/dropout/random_uniform/mul"
+ input: "bert/encoder/layer_3/attention/self/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/dropout/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/dropout/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_3/attention/self/dropout/sub/x"
+ input: "bert/encoder/layer_3/attention/self/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/dropout/truediv/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/dropout/truediv"
+ op: "RealDiv"
+ input: "bert/encoder/layer_3/attention/self/dropout/truediv/x"
+ input: "bert/encoder/layer_3/attention/self/dropout/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/dropout/GreaterEqual"
+ op: "GreaterEqual"
+ input: "bert/encoder/layer_3/attention/self/dropout/random_uniform"
+ input: "bert/encoder/layer_3/attention/self/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/dropout/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_3/attention/self/Softmax"
+ input: "bert/encoder/layer_3/attention/self/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/dropout/Cast"
+ op: "Cast"
+ input: "bert/encoder/layer_3/attention/self/dropout/GreaterEqual"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/dropout/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_3/attention/self/dropout/mul"
+ input: "bert/encoder/layer_3/attention/self/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/Reshape_2/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/Reshape_2"
+ op: "Reshape"
+ input: "bert/encoder/layer_3/attention/self/value/BiasAdd"
+ input: "bert/encoder/layer_3/attention/self/Reshape_2/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/transpose_2/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/transpose_2"
+ op: "Transpose"
+ input: "bert/encoder/layer_3/attention/self/Reshape_2"
+ input: "bert/encoder/layer_3/attention/self/transpose_2/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/MatMul_1"
+ op: "BatchMatMulV2"
+ input: "bert/encoder/layer_3/attention/self/dropout/mul_1"
+ input: "bert/encoder/layer_3/attention/self/transpose_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/transpose_3/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/transpose_3"
+ op: "Transpose"
+ input: "bert/encoder/layer_3/attention/self/MatMul_1"
+ input: "bert/encoder/layer_3/attention/self/transpose_3/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/Reshape_3/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/Reshape_3"
+ op: "Reshape"
+ input: "bert/encoder/layer_3/attention/self/transpose_3"
+ input: "bert/encoder/layer_3/attention/self/Reshape_3/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dense/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dense/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dense/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_3/attention/output/dense/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dense/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_3/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_3/attention/output/dense/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dense/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_3/attention/output/dense/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_3/attention/output/dense/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dense/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dense/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/output/dense/kernel"
+ input: "bert/encoder/layer_3/attention/output/dense/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dense/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/attention/output/dense/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dense/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dense/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dense/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/output/dense/bias"
+ input: "bert/encoder/layer_3/attention/output/dense/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dense/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/attention/output/dense/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dense/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_3/attention/self/Reshape_3"
+ input: "bert/encoder/layer_3/attention/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dense/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_3/attention/output/dense/MatMul"
+ input: "bert/encoder/layer_3/attention/output/dense/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dropout/rate"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dropout/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dropout/random_uniform/min"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dropout/random_uniform/max"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dropout/random_uniform/RandomUniform"
+ op: "RandomUniform"
+ input: "bert/encoder/layer_3/attention/output/dropout/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dropout/random_uniform/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_3/attention/output/dropout/random_uniform/max"
+ input: "bert/encoder/layer_3/attention/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dropout/random_uniform/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_3/attention/output/dropout/random_uniform/RandomUniform"
+ input: "bert/encoder/layer_3/attention/output/dropout/random_uniform/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dropout/random_uniform"
+ op: "Add"
+ input: "bert/encoder/layer_3/attention/output/dropout/random_uniform/mul"
+ input: "bert/encoder/layer_3/attention/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dropout/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dropout/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_3/attention/output/dropout/sub/x"
+ input: "bert/encoder/layer_3/attention/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dropout/truediv/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dropout/truediv"
+ op: "RealDiv"
+ input: "bert/encoder/layer_3/attention/output/dropout/truediv/x"
+ input: "bert/encoder/layer_3/attention/output/dropout/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dropout/GreaterEqual"
+ op: "GreaterEqual"
+ input: "bert/encoder/layer_3/attention/output/dropout/random_uniform"
+ input: "bert/encoder/layer_3/attention/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dropout/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_3/attention/output/dense/BiasAdd"
+ input: "bert/encoder/layer_3/attention/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dropout/Cast"
+ op: "Cast"
+ input: "bert/encoder/layer_3/attention/output/dropout/GreaterEqual"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dropout/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_3/attention/output/dropout/mul"
+ input: "bert/encoder/layer_3/attention/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/add"
+ op: "Add"
+ input: "bert/encoder/layer_3/attention/output/dropout/mul_1"
+ input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/LayerNorm/beta/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/LayerNorm/beta"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/LayerNorm/beta/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/beta"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/LayerNorm/beta/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/beta"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/Initializer/ones"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/LayerNorm/gamma"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/Initializer/ones"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/LayerNorm/moments/mean/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/LayerNorm/moments/mean"
+ op: "Mean"
+ input: "bert/encoder/layer_3/attention/output/add"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/moments/mean/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/LayerNorm/moments/StopGradient"
+ op: "StopGradient"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/moments/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference"
+ op: "SquaredDifference"
+ input: "bert/encoder/layer_3/attention/output/add"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/moments/StopGradient"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/LayerNorm/moments/variance/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/LayerNorm/moments/variance"
+ op: "Mean"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/moments/variance/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999960041972e-13
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add"
+ op: "Add"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/moments/variance"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/Rsqrt"
+ op: "Rsqrt"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/Rsqrt"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_3/attention/output/add"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2"
+ op: "Mul"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/moments/mean"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/read"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_1"
+ op: "Add"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_1"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_3/intermediate/dense/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_3/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_3/intermediate/dense/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_3/intermediate/dense/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_3/intermediate/dense/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/intermediate/dense/kernel"
+ input: "bert/encoder/layer_3/intermediate/dense/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/intermediate/dense/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/bias/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 3072
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/bias/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/bias/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_3/intermediate/dense/bias/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_3/intermediate/dense/bias/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/intermediate/dense/bias"
+ input: "bert/encoder/layer_3/intermediate/dense/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/intermediate/dense/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_3/intermediate/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_3/intermediate/dense/MatMul"
+ input: "bert/encoder/layer_3/intermediate/dense/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/Pow/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 3.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/Pow"
+ op: "Pow"
+ input: "bert/encoder/layer_3/intermediate/dense/BiasAdd"
+ input: "bert/encoder/layer_3/intermediate/dense/Pow/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/mul/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.044714998453855515
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_3/intermediate/dense/mul/x"
+ input: "bert/encoder/layer_3/intermediate/dense/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/add"
+ op: "Add"
+ input: "bert/encoder/layer_3/intermediate/dense/BiasAdd"
+ input: "bert/encoder/layer_3/intermediate/dense/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/mul_1/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.7978845834732056
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_3/intermediate/dense/mul_1/x"
+ input: "bert/encoder/layer_3/intermediate/dense/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/Tanh"
+ op: "Tanh"
+ input: "bert/encoder/layer_3/intermediate/dense/mul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/add_1/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/add_1"
+ op: "Add"
+ input: "bert/encoder/layer_3/intermediate/dense/add_1/x"
+ input: "bert/encoder/layer_3/intermediate/dense/Tanh"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/mul_2/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.5
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/mul_2"
+ op: "Mul"
+ input: "bert/encoder/layer_3/intermediate/dense/mul_2/x"
+ input: "bert/encoder/layer_3/intermediate/dense/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/mul_3"
+ op: "Mul"
+ input: "bert/encoder/layer_3/intermediate/dense/BiasAdd"
+ input: "bert/encoder/layer_3/intermediate/dense/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dense/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\014\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dense/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dense/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_3/output/dense/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dense/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_3/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_3/output/dense/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dense/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_3/output/dense/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_3/output/dense/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dense/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dense/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/output/dense/kernel"
+ input: "bert/encoder/layer_3/output/dense/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dense/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/output/dense/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dense/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dense/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dense/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/output/dense/bias"
+ input: "bert/encoder/layer_3/output/dense/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dense/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/output/dense/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dense/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_3/intermediate/dense/mul_3"
+ input: "bert/encoder/layer_3/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dense/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_3/output/dense/MatMul"
+ input: "bert/encoder/layer_3/output/dense/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dropout/rate"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dropout/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dropout/random_uniform/min"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dropout/random_uniform/max"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dropout/random_uniform/RandomUniform"
+ op: "RandomUniform"
+ input: "bert/encoder/layer_3/output/dropout/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dropout/random_uniform/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_3/output/dropout/random_uniform/max"
+ input: "bert/encoder/layer_3/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dropout/random_uniform/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_3/output/dropout/random_uniform/RandomUniform"
+ input: "bert/encoder/layer_3/output/dropout/random_uniform/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dropout/random_uniform"
+ op: "Add"
+ input: "bert/encoder/layer_3/output/dropout/random_uniform/mul"
+ input: "bert/encoder/layer_3/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dropout/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dropout/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_3/output/dropout/sub/x"
+ input: "bert/encoder/layer_3/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dropout/truediv/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dropout/truediv"
+ op: "RealDiv"
+ input: "bert/encoder/layer_3/output/dropout/truediv/x"
+ input: "bert/encoder/layer_3/output/dropout/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dropout/GreaterEqual"
+ op: "GreaterEqual"
+ input: "bert/encoder/layer_3/output/dropout/random_uniform"
+ input: "bert/encoder/layer_3/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dropout/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_3/output/dense/BiasAdd"
+ input: "bert/encoder/layer_3/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dropout/Cast"
+ op: "Cast"
+ input: "bert/encoder/layer_3/output/dropout/GreaterEqual"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dropout/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_3/output/dropout/mul"
+ input: "bert/encoder/layer_3/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/add"
+ op: "Add"
+ input: "bert/encoder/layer_3/output/dropout/mul_1"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/LayerNorm/beta/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/LayerNorm/beta"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/LayerNorm/beta/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/output/LayerNorm/beta"
+ input: "bert/encoder/layer_3/output/LayerNorm/beta/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/LayerNorm/beta/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/output/LayerNorm/beta"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/LayerNorm/gamma/Initializer/ones"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/LayerNorm/gamma"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/LayerNorm/gamma/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_3/output/LayerNorm/gamma/Initializer/ones"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/LayerNorm/gamma/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/output/LayerNorm/gamma"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/LayerNorm/moments/mean/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/LayerNorm/moments/mean"
+ op: "Mean"
+ input: "bert/encoder/layer_3/output/add"
+ input: "bert/encoder/layer_3/output/LayerNorm/moments/mean/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/LayerNorm/moments/StopGradient"
+ op: "StopGradient"
+ input: "bert/encoder/layer_3/output/LayerNorm/moments/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference"
+ op: "SquaredDifference"
+ input: "bert/encoder/layer_3/output/add"
+ input: "bert/encoder/layer_3/output/LayerNorm/moments/StopGradient"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/LayerNorm/moments/variance/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/LayerNorm/moments/variance"
+ op: "Mean"
+ input: "bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference"
+ input: "bert/encoder/layer_3/output/LayerNorm/moments/variance/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/LayerNorm/batchnorm/add/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999960041972e-13
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/LayerNorm/batchnorm/add"
+ op: "Add"
+ input: "bert/encoder/layer_3/output/LayerNorm/moments/variance"
+ input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/add/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/LayerNorm/batchnorm/Rsqrt"
+ op: "Rsqrt"
+ input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/LayerNorm/batchnorm/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/Rsqrt"
+ input: "bert/encoder/layer_3/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_3/output/add"
+ input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2"
+ op: "Mul"
+ input: "bert/encoder/layer_3/output/LayerNorm/moments/mean"
+ input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/LayerNorm/batchnorm/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_3/output/LayerNorm/beta/read"
+ input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1"
+ op: "Add"
+ input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_1"
+ input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/query/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/query/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/query/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_4/attention/self/query/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/query/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_4/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_4/attention/self/query/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/query/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_4/attention/self/query/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_4/attention/self/query/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/query/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/query/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/query/kernel"
+ input: "bert/encoder/layer_4/attention/self/query/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/query/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/attention/self/query/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/query/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/query/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/query/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/query/bias"
+ input: "bert/encoder/layer_4/attention/self/query/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/query/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/attention/self/query/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/query/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_4/attention/self/query/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/query/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_4/attention/self/query/MatMul"
+ input: "bert/encoder/layer_4/attention/self/query/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/key/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/key/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/key/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_4/attention/self/key/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/key/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_4/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_4/attention/self/key/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/key/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_4/attention/self/key/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_4/attention/self/key/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/key/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/key/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/key/kernel"
+ input: "bert/encoder/layer_4/attention/self/key/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/key/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/attention/self/key/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/key/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/key/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/key/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/key/bias"
+ input: "bert/encoder/layer_4/attention/self/key/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/key/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/attention/self/key/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/key/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_4/attention/self/key/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/key/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_4/attention/self/key/MatMul"
+ input: "bert/encoder/layer_4/attention/self/key/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/value/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/value/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/value/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_4/attention/self/value/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/value/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_4/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_4/attention/self/value/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/value/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_4/attention/self/value/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_4/attention/self/value/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/value/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/value/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/value/kernel"
+ input: "bert/encoder/layer_4/attention/self/value/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/value/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/attention/self/value/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/value/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/value/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/value/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/value/bias"
+ input: "bert/encoder/layer_4/attention/self/value/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/value/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/attention/self/value/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/value/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_4/attention/self/value/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/value/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_4/attention/self/value/MatMul"
+ input: "bert/encoder/layer_4/attention/self/value/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/Reshape/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/Reshape"
+ op: "Reshape"
+ input: "bert/encoder/layer_4/attention/self/query/BiasAdd"
+ input: "bert/encoder/layer_4/attention/self/Reshape/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/transpose/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/transpose"
+ op: "Transpose"
+ input: "bert/encoder/layer_4/attention/self/Reshape"
+ input: "bert/encoder/layer_4/attention/self/transpose/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/Reshape_1/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/Reshape_1"
+ op: "Reshape"
+ input: "bert/encoder/layer_4/attention/self/key/BiasAdd"
+ input: "bert/encoder/layer_4/attention/self/Reshape_1/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/transpose_1/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/transpose_1"
+ op: "Transpose"
+ input: "bert/encoder/layer_4/attention/self/Reshape_1"
+ input: "bert/encoder/layer_4/attention/self/transpose_1/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/MatMul"
+ op: "BatchMatMulV2"
+ input: "bert/encoder/layer_4/attention/self/transpose"
+ input: "bert/encoder/layer_4/attention/self/transpose_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/Mul/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.125
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/Mul"
+ op: "Mul"
+ input: "bert/encoder/layer_4/attention/self/MatMul"
+ input: "bert/encoder/layer_4/attention/self/Mul/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/ExpandDims/dim"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/ExpandDims"
+ op: "ExpandDims"
+ input: "bert/encoder/mul"
+ input: "bert/encoder/layer_4/attention/self/ExpandDims/dim"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tdim"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_4/attention/self/sub/x"
+ input: "bert/encoder/layer_4/attention/self/ExpandDims"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/mul_1/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: -10000.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_4/attention/self/sub"
+ input: "bert/encoder/layer_4/attention/self/mul_1/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/add"
+ op: "Add"
+ input: "bert/encoder/layer_4/attention/self/Mul"
+ input: "bert/encoder/layer_4/attention/self/mul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/Softmax"
+ op: "Softmax"
+ input: "bert/encoder/layer_4/attention/self/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/dropout/rate"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/dropout/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/dropout/random_uniform/min"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/dropout/random_uniform/max"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/dropout/random_uniform/RandomUniform"
+ op: "RandomUniform"
+ input: "bert/encoder/layer_4/attention/self/dropout/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/dropout/random_uniform/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_4/attention/self/dropout/random_uniform/max"
+ input: "bert/encoder/layer_4/attention/self/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/dropout/random_uniform/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_4/attention/self/dropout/random_uniform/RandomUniform"
+ input: "bert/encoder/layer_4/attention/self/dropout/random_uniform/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/dropout/random_uniform"
+ op: "Add"
+ input: "bert/encoder/layer_4/attention/self/dropout/random_uniform/mul"
+ input: "bert/encoder/layer_4/attention/self/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/dropout/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/dropout/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_4/attention/self/dropout/sub/x"
+ input: "bert/encoder/layer_4/attention/self/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/dropout/truediv/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/dropout/truediv"
+ op: "RealDiv"
+ input: "bert/encoder/layer_4/attention/self/dropout/truediv/x"
+ input: "bert/encoder/layer_4/attention/self/dropout/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/dropout/GreaterEqual"
+ op: "GreaterEqual"
+ input: "bert/encoder/layer_4/attention/self/dropout/random_uniform"
+ input: "bert/encoder/layer_4/attention/self/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/dropout/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_4/attention/self/Softmax"
+ input: "bert/encoder/layer_4/attention/self/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/dropout/Cast"
+ op: "Cast"
+ input: "bert/encoder/layer_4/attention/self/dropout/GreaterEqual"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/dropout/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_4/attention/self/dropout/mul"
+ input: "bert/encoder/layer_4/attention/self/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/Reshape_2/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/Reshape_2"
+ op: "Reshape"
+ input: "bert/encoder/layer_4/attention/self/value/BiasAdd"
+ input: "bert/encoder/layer_4/attention/self/Reshape_2/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/transpose_2/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/transpose_2"
+ op: "Transpose"
+ input: "bert/encoder/layer_4/attention/self/Reshape_2"
+ input: "bert/encoder/layer_4/attention/self/transpose_2/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/MatMul_1"
+ op: "BatchMatMulV2"
+ input: "bert/encoder/layer_4/attention/self/dropout/mul_1"
+ input: "bert/encoder/layer_4/attention/self/transpose_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/transpose_3/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/transpose_3"
+ op: "Transpose"
+ input: "bert/encoder/layer_4/attention/self/MatMul_1"
+ input: "bert/encoder/layer_4/attention/self/transpose_3/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/Reshape_3/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/Reshape_3"
+ op: "Reshape"
+ input: "bert/encoder/layer_4/attention/self/transpose_3"
+ input: "bert/encoder/layer_4/attention/self/Reshape_3/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dense/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dense/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dense/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_4/attention/output/dense/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dense/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_4/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_4/attention/output/dense/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dense/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_4/attention/output/dense/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_4/attention/output/dense/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dense/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dense/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/output/dense/kernel"
+ input: "bert/encoder/layer_4/attention/output/dense/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dense/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/attention/output/dense/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dense/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dense/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dense/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/output/dense/bias"
+ input: "bert/encoder/layer_4/attention/output/dense/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dense/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/attention/output/dense/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dense/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_4/attention/self/Reshape_3"
+ input: "bert/encoder/layer_4/attention/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dense/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_4/attention/output/dense/MatMul"
+ input: "bert/encoder/layer_4/attention/output/dense/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dropout/rate"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dropout/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dropout/random_uniform/min"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dropout/random_uniform/max"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dropout/random_uniform/RandomUniform"
+ op: "RandomUniform"
+ input: "bert/encoder/layer_4/attention/output/dropout/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dropout/random_uniform/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_4/attention/output/dropout/random_uniform/max"
+ input: "bert/encoder/layer_4/attention/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dropout/random_uniform/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_4/attention/output/dropout/random_uniform/RandomUniform"
+ input: "bert/encoder/layer_4/attention/output/dropout/random_uniform/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dropout/random_uniform"
+ op: "Add"
+ input: "bert/encoder/layer_4/attention/output/dropout/random_uniform/mul"
+ input: "bert/encoder/layer_4/attention/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dropout/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dropout/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_4/attention/output/dropout/sub/x"
+ input: "bert/encoder/layer_4/attention/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dropout/truediv/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dropout/truediv"
+ op: "RealDiv"
+ input: "bert/encoder/layer_4/attention/output/dropout/truediv/x"
+ input: "bert/encoder/layer_4/attention/output/dropout/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dropout/GreaterEqual"
+ op: "GreaterEqual"
+ input: "bert/encoder/layer_4/attention/output/dropout/random_uniform"
+ input: "bert/encoder/layer_4/attention/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dropout/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_4/attention/output/dense/BiasAdd"
+ input: "bert/encoder/layer_4/attention/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dropout/Cast"
+ op: "Cast"
+ input: "bert/encoder/layer_4/attention/output/dropout/GreaterEqual"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dropout/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_4/attention/output/dropout/mul"
+ input: "bert/encoder/layer_4/attention/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/add"
+ op: "Add"
+ input: "bert/encoder/layer_4/attention/output/dropout/mul_1"
+ input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/LayerNorm/beta/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/LayerNorm/beta"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/LayerNorm/beta/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/beta"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/LayerNorm/beta/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/beta"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/Initializer/ones"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/LayerNorm/gamma"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/Initializer/ones"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/LayerNorm/moments/mean/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/LayerNorm/moments/mean"
+ op: "Mean"
+ input: "bert/encoder/layer_4/attention/output/add"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/moments/mean/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/LayerNorm/moments/StopGradient"
+ op: "StopGradient"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/moments/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference"
+ op: "SquaredDifference"
+ input: "bert/encoder/layer_4/attention/output/add"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/moments/StopGradient"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/LayerNorm/moments/variance/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/LayerNorm/moments/variance"
+ op: "Mean"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/moments/variance/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999960041972e-13
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add"
+ op: "Add"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/moments/variance"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/Rsqrt"
+ op: "Rsqrt"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/Rsqrt"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_4/attention/output/add"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2"
+ op: "Mul"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/moments/mean"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/read"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_1"
+ op: "Add"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_1"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_4/intermediate/dense/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_4/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_4/intermediate/dense/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_4/intermediate/dense/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_4/intermediate/dense/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/intermediate/dense/kernel"
+ input: "bert/encoder/layer_4/intermediate/dense/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/intermediate/dense/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/bias/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 3072
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/bias/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/bias/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_4/intermediate/dense/bias/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_4/intermediate/dense/bias/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/intermediate/dense/bias"
+ input: "bert/encoder/layer_4/intermediate/dense/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/intermediate/dense/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_4/intermediate/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_4/intermediate/dense/MatMul"
+ input: "bert/encoder/layer_4/intermediate/dense/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/Pow/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 3.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/Pow"
+ op: "Pow"
+ input: "bert/encoder/layer_4/intermediate/dense/BiasAdd"
+ input: "bert/encoder/layer_4/intermediate/dense/Pow/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/mul/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.044714998453855515
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_4/intermediate/dense/mul/x"
+ input: "bert/encoder/layer_4/intermediate/dense/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/add"
+ op: "Add"
+ input: "bert/encoder/layer_4/intermediate/dense/BiasAdd"
+ input: "bert/encoder/layer_4/intermediate/dense/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/mul_1/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.7978845834732056
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_4/intermediate/dense/mul_1/x"
+ input: "bert/encoder/layer_4/intermediate/dense/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/Tanh"
+ op: "Tanh"
+ input: "bert/encoder/layer_4/intermediate/dense/mul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/add_1/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/add_1"
+ op: "Add"
+ input: "bert/encoder/layer_4/intermediate/dense/add_1/x"
+ input: "bert/encoder/layer_4/intermediate/dense/Tanh"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/mul_2/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.5
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/mul_2"
+ op: "Mul"
+ input: "bert/encoder/layer_4/intermediate/dense/mul_2/x"
+ input: "bert/encoder/layer_4/intermediate/dense/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/mul_3"
+ op: "Mul"
+ input: "bert/encoder/layer_4/intermediate/dense/BiasAdd"
+ input: "bert/encoder/layer_4/intermediate/dense/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dense/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\014\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dense/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dense/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_4/output/dense/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dense/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_4/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_4/output/dense/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dense/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_4/output/dense/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_4/output/dense/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dense/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dense/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/output/dense/kernel"
+ input: "bert/encoder/layer_4/output/dense/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dense/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/output/dense/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dense/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dense/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dense/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/output/dense/bias"
+ input: "bert/encoder/layer_4/output/dense/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dense/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/output/dense/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dense/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_4/intermediate/dense/mul_3"
+ input: "bert/encoder/layer_4/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dense/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_4/output/dense/MatMul"
+ input: "bert/encoder/layer_4/output/dense/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dropout/rate"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dropout/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dropout/random_uniform/min"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dropout/random_uniform/max"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dropout/random_uniform/RandomUniform"
+ op: "RandomUniform"
+ input: "bert/encoder/layer_4/output/dropout/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dropout/random_uniform/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_4/output/dropout/random_uniform/max"
+ input: "bert/encoder/layer_4/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dropout/random_uniform/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_4/output/dropout/random_uniform/RandomUniform"
+ input: "bert/encoder/layer_4/output/dropout/random_uniform/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dropout/random_uniform"
+ op: "Add"
+ input: "bert/encoder/layer_4/output/dropout/random_uniform/mul"
+ input: "bert/encoder/layer_4/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dropout/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dropout/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_4/output/dropout/sub/x"
+ input: "bert/encoder/layer_4/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dropout/truediv/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dropout/truediv"
+ op: "RealDiv"
+ input: "bert/encoder/layer_4/output/dropout/truediv/x"
+ input: "bert/encoder/layer_4/output/dropout/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dropout/GreaterEqual"
+ op: "GreaterEqual"
+ input: "bert/encoder/layer_4/output/dropout/random_uniform"
+ input: "bert/encoder/layer_4/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dropout/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_4/output/dense/BiasAdd"
+ input: "bert/encoder/layer_4/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dropout/Cast"
+ op: "Cast"
+ input: "bert/encoder/layer_4/output/dropout/GreaterEqual"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dropout/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_4/output/dropout/mul"
+ input: "bert/encoder/layer_4/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/add"
+ op: "Add"
+ input: "bert/encoder/layer_4/output/dropout/mul_1"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/LayerNorm/beta/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/LayerNorm/beta"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/LayerNorm/beta/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/output/LayerNorm/beta"
+ input: "bert/encoder/layer_4/output/LayerNorm/beta/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/LayerNorm/beta/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/output/LayerNorm/beta"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/LayerNorm/gamma/Initializer/ones"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/LayerNorm/gamma"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/LayerNorm/gamma/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_4/output/LayerNorm/gamma/Initializer/ones"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/LayerNorm/gamma/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/output/LayerNorm/gamma"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/LayerNorm/moments/mean/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/LayerNorm/moments/mean"
+ op: "Mean"
+ input: "bert/encoder/layer_4/output/add"
+ input: "bert/encoder/layer_4/output/LayerNorm/moments/mean/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/LayerNorm/moments/StopGradient"
+ op: "StopGradient"
+ input: "bert/encoder/layer_4/output/LayerNorm/moments/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference"
+ op: "SquaredDifference"
+ input: "bert/encoder/layer_4/output/add"
+ input: "bert/encoder/layer_4/output/LayerNorm/moments/StopGradient"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/LayerNorm/moments/variance/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/LayerNorm/moments/variance"
+ op: "Mean"
+ input: "bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference"
+ input: "bert/encoder/layer_4/output/LayerNorm/moments/variance/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/LayerNorm/batchnorm/add/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999960041972e-13
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/LayerNorm/batchnorm/add"
+ op: "Add"
+ input: "bert/encoder/layer_4/output/LayerNorm/moments/variance"
+ input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/add/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/LayerNorm/batchnorm/Rsqrt"
+ op: "Rsqrt"
+ input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/LayerNorm/batchnorm/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/Rsqrt"
+ input: "bert/encoder/layer_4/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_4/output/add"
+ input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2"
+ op: "Mul"
+ input: "bert/encoder/layer_4/output/LayerNorm/moments/mean"
+ input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/LayerNorm/batchnorm/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_4/output/LayerNorm/beta/read"
+ input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1"
+ op: "Add"
+ input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_1"
+ input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/query/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/query/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/query/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_5/attention/self/query/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/query/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_5/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_5/attention/self/query/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/query/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_5/attention/self/query/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_5/attention/self/query/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/query/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/query/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/query/kernel"
+ input: "bert/encoder/layer_5/attention/self/query/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/query/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/attention/self/query/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/query/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/query/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/query/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/query/bias"
+ input: "bert/encoder/layer_5/attention/self/query/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/query/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/attention/self/query/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/query/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_5/attention/self/query/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/query/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_5/attention/self/query/MatMul"
+ input: "bert/encoder/layer_5/attention/self/query/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/key/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/key/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/key/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_5/attention/self/key/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/key/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_5/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_5/attention/self/key/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/key/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_5/attention/self/key/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_5/attention/self/key/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/key/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/key/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/key/kernel"
+ input: "bert/encoder/layer_5/attention/self/key/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/key/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/attention/self/key/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/key/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/key/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/key/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/key/bias"
+ input: "bert/encoder/layer_5/attention/self/key/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/key/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/attention/self/key/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/key/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_5/attention/self/key/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/key/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_5/attention/self/key/MatMul"
+ input: "bert/encoder/layer_5/attention/self/key/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/value/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/value/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/value/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_5/attention/self/value/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/value/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_5/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_5/attention/self/value/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/value/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_5/attention/self/value/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_5/attention/self/value/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/value/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/value/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/value/kernel"
+ input: "bert/encoder/layer_5/attention/self/value/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/value/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/attention/self/value/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/value/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/value/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/value/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/value/bias"
+ input: "bert/encoder/layer_5/attention/self/value/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/value/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/attention/self/value/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/value/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_5/attention/self/value/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/value/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_5/attention/self/value/MatMul"
+ input: "bert/encoder/layer_5/attention/self/value/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/Reshape/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/Reshape"
+ op: "Reshape"
+ input: "bert/encoder/layer_5/attention/self/query/BiasAdd"
+ input: "bert/encoder/layer_5/attention/self/Reshape/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/transpose/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/transpose"
+ op: "Transpose"
+ input: "bert/encoder/layer_5/attention/self/Reshape"
+ input: "bert/encoder/layer_5/attention/self/transpose/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/Reshape_1/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/Reshape_1"
+ op: "Reshape"
+ input: "bert/encoder/layer_5/attention/self/key/BiasAdd"
+ input: "bert/encoder/layer_5/attention/self/Reshape_1/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/transpose_1/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/transpose_1"
+ op: "Transpose"
+ input: "bert/encoder/layer_5/attention/self/Reshape_1"
+ input: "bert/encoder/layer_5/attention/self/transpose_1/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/MatMul"
+ op: "BatchMatMulV2"
+ input: "bert/encoder/layer_5/attention/self/transpose"
+ input: "bert/encoder/layer_5/attention/self/transpose_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/Mul/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.125
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/Mul"
+ op: "Mul"
+ input: "bert/encoder/layer_5/attention/self/MatMul"
+ input: "bert/encoder/layer_5/attention/self/Mul/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/ExpandDims/dim"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/ExpandDims"
+ op: "ExpandDims"
+ input: "bert/encoder/mul"
+ input: "bert/encoder/layer_5/attention/self/ExpandDims/dim"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tdim"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_5/attention/self/sub/x"
+ input: "bert/encoder/layer_5/attention/self/ExpandDims"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/mul_1/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: -10000.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_5/attention/self/sub"
+ input: "bert/encoder/layer_5/attention/self/mul_1/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/add"
+ op: "Add"
+ input: "bert/encoder/layer_5/attention/self/Mul"
+ input: "bert/encoder/layer_5/attention/self/mul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/Softmax"
+ op: "Softmax"
+ input: "bert/encoder/layer_5/attention/self/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/dropout/rate"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/dropout/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/dropout/random_uniform/min"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/dropout/random_uniform/max"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/dropout/random_uniform/RandomUniform"
+ op: "RandomUniform"
+ input: "bert/encoder/layer_5/attention/self/dropout/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/dropout/random_uniform/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_5/attention/self/dropout/random_uniform/max"
+ input: "bert/encoder/layer_5/attention/self/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/dropout/random_uniform/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_5/attention/self/dropout/random_uniform/RandomUniform"
+ input: "bert/encoder/layer_5/attention/self/dropout/random_uniform/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/dropout/random_uniform"
+ op: "Add"
+ input: "bert/encoder/layer_5/attention/self/dropout/random_uniform/mul"
+ input: "bert/encoder/layer_5/attention/self/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/dropout/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/dropout/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_5/attention/self/dropout/sub/x"
+ input: "bert/encoder/layer_5/attention/self/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/dropout/truediv/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/dropout/truediv"
+ op: "RealDiv"
+ input: "bert/encoder/layer_5/attention/self/dropout/truediv/x"
+ input: "bert/encoder/layer_5/attention/self/dropout/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/dropout/GreaterEqual"
+ op: "GreaterEqual"
+ input: "bert/encoder/layer_5/attention/self/dropout/random_uniform"
+ input: "bert/encoder/layer_5/attention/self/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/dropout/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_5/attention/self/Softmax"
+ input: "bert/encoder/layer_5/attention/self/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/dropout/Cast"
+ op: "Cast"
+ input: "bert/encoder/layer_5/attention/self/dropout/GreaterEqual"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/dropout/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_5/attention/self/dropout/mul"
+ input: "bert/encoder/layer_5/attention/self/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/Reshape_2/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/Reshape_2"
+ op: "Reshape"
+ input: "bert/encoder/layer_5/attention/self/value/BiasAdd"
+ input: "bert/encoder/layer_5/attention/self/Reshape_2/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/transpose_2/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/transpose_2"
+ op: "Transpose"
+ input: "bert/encoder/layer_5/attention/self/Reshape_2"
+ input: "bert/encoder/layer_5/attention/self/transpose_2/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/MatMul_1"
+ op: "BatchMatMulV2"
+ input: "bert/encoder/layer_5/attention/self/dropout/mul_1"
+ input: "bert/encoder/layer_5/attention/self/transpose_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/transpose_3/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/transpose_3"
+ op: "Transpose"
+ input: "bert/encoder/layer_5/attention/self/MatMul_1"
+ input: "bert/encoder/layer_5/attention/self/transpose_3/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/Reshape_3/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/Reshape_3"
+ op: "Reshape"
+ input: "bert/encoder/layer_5/attention/self/transpose_3"
+ input: "bert/encoder/layer_5/attention/self/Reshape_3/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dense/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dense/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dense/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_5/attention/output/dense/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dense/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_5/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_5/attention/output/dense/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dense/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_5/attention/output/dense/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_5/attention/output/dense/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dense/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dense/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/output/dense/kernel"
+ input: "bert/encoder/layer_5/attention/output/dense/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dense/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/attention/output/dense/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dense/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dense/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dense/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/output/dense/bias"
+ input: "bert/encoder/layer_5/attention/output/dense/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dense/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/attention/output/dense/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dense/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_5/attention/self/Reshape_3"
+ input: "bert/encoder/layer_5/attention/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dense/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_5/attention/output/dense/MatMul"
+ input: "bert/encoder/layer_5/attention/output/dense/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dropout/rate"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dropout/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dropout/random_uniform/min"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dropout/random_uniform/max"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dropout/random_uniform/RandomUniform"
+ op: "RandomUniform"
+ input: "bert/encoder/layer_5/attention/output/dropout/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dropout/random_uniform/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_5/attention/output/dropout/random_uniform/max"
+ input: "bert/encoder/layer_5/attention/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dropout/random_uniform/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_5/attention/output/dropout/random_uniform/RandomUniform"
+ input: "bert/encoder/layer_5/attention/output/dropout/random_uniform/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dropout/random_uniform"
+ op: "Add"
+ input: "bert/encoder/layer_5/attention/output/dropout/random_uniform/mul"
+ input: "bert/encoder/layer_5/attention/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dropout/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dropout/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_5/attention/output/dropout/sub/x"
+ input: "bert/encoder/layer_5/attention/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dropout/truediv/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dropout/truediv"
+ op: "RealDiv"
+ input: "bert/encoder/layer_5/attention/output/dropout/truediv/x"
+ input: "bert/encoder/layer_5/attention/output/dropout/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dropout/GreaterEqual"
+ op: "GreaterEqual"
+ input: "bert/encoder/layer_5/attention/output/dropout/random_uniform"
+ input: "bert/encoder/layer_5/attention/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dropout/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_5/attention/output/dense/BiasAdd"
+ input: "bert/encoder/layer_5/attention/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dropout/Cast"
+ op: "Cast"
+ input: "bert/encoder/layer_5/attention/output/dropout/GreaterEqual"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dropout/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_5/attention/output/dropout/mul"
+ input: "bert/encoder/layer_5/attention/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/add"
+ op: "Add"
+ input: "bert/encoder/layer_5/attention/output/dropout/mul_1"
+ input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/LayerNorm/beta/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/LayerNorm/beta"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/LayerNorm/beta/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/beta"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/LayerNorm/beta/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/beta"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/Initializer/ones"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/LayerNorm/gamma"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/Initializer/ones"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/LayerNorm/moments/mean/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/LayerNorm/moments/mean"
+ op: "Mean"
+ input: "bert/encoder/layer_5/attention/output/add"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/moments/mean/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/LayerNorm/moments/StopGradient"
+ op: "StopGradient"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/moments/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference"
+ op: "SquaredDifference"
+ input: "bert/encoder/layer_5/attention/output/add"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/moments/StopGradient"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/LayerNorm/moments/variance/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/LayerNorm/moments/variance"
+ op: "Mean"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/moments/variance/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999960041972e-13
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add"
+ op: "Add"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/moments/variance"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/Rsqrt"
+ op: "Rsqrt"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/Rsqrt"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_5/attention/output/add"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2"
+ op: "Mul"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/moments/mean"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/read"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_1"
+ op: "Add"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_1"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_5/intermediate/dense/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_5/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_5/intermediate/dense/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_5/intermediate/dense/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_5/intermediate/dense/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/intermediate/dense/kernel"
+ input: "bert/encoder/layer_5/intermediate/dense/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/intermediate/dense/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/bias/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 3072
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/bias/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/bias/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_5/intermediate/dense/bias/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_5/intermediate/dense/bias/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/intermediate/dense/bias"
+ input: "bert/encoder/layer_5/intermediate/dense/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/intermediate/dense/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_5/intermediate/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_5/intermediate/dense/MatMul"
+ input: "bert/encoder/layer_5/intermediate/dense/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/Pow/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 3.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/Pow"
+ op: "Pow"
+ input: "bert/encoder/layer_5/intermediate/dense/BiasAdd"
+ input: "bert/encoder/layer_5/intermediate/dense/Pow/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/mul/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.044714998453855515
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_5/intermediate/dense/mul/x"
+ input: "bert/encoder/layer_5/intermediate/dense/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/add"
+ op: "Add"
+ input: "bert/encoder/layer_5/intermediate/dense/BiasAdd"
+ input: "bert/encoder/layer_5/intermediate/dense/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/mul_1/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.7978845834732056
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_5/intermediate/dense/mul_1/x"
+ input: "bert/encoder/layer_5/intermediate/dense/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/Tanh"
+ op: "Tanh"
+ input: "bert/encoder/layer_5/intermediate/dense/mul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/add_1/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/add_1"
+ op: "Add"
+ input: "bert/encoder/layer_5/intermediate/dense/add_1/x"
+ input: "bert/encoder/layer_5/intermediate/dense/Tanh"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/mul_2/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.5
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/mul_2"
+ op: "Mul"
+ input: "bert/encoder/layer_5/intermediate/dense/mul_2/x"
+ input: "bert/encoder/layer_5/intermediate/dense/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/mul_3"
+ op: "Mul"
+ input: "bert/encoder/layer_5/intermediate/dense/BiasAdd"
+ input: "bert/encoder/layer_5/intermediate/dense/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dense/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\014\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dense/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dense/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_5/output/dense/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dense/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_5/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_5/output/dense/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dense/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_5/output/dense/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_5/output/dense/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dense/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dense/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/output/dense/kernel"
+ input: "bert/encoder/layer_5/output/dense/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dense/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/output/dense/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dense/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dense/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dense/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/output/dense/bias"
+ input: "bert/encoder/layer_5/output/dense/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dense/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/output/dense/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dense/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_5/intermediate/dense/mul_3"
+ input: "bert/encoder/layer_5/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dense/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_5/output/dense/MatMul"
+ input: "bert/encoder/layer_5/output/dense/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dropout/rate"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dropout/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dropout/random_uniform/min"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dropout/random_uniform/max"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dropout/random_uniform/RandomUniform"
+ op: "RandomUniform"
+ input: "bert/encoder/layer_5/output/dropout/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dropout/random_uniform/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_5/output/dropout/random_uniform/max"
+ input: "bert/encoder/layer_5/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dropout/random_uniform/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_5/output/dropout/random_uniform/RandomUniform"
+ input: "bert/encoder/layer_5/output/dropout/random_uniform/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dropout/random_uniform"
+ op: "Add"
+ input: "bert/encoder/layer_5/output/dropout/random_uniform/mul"
+ input: "bert/encoder/layer_5/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dropout/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dropout/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_5/output/dropout/sub/x"
+ input: "bert/encoder/layer_5/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dropout/truediv/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dropout/truediv"
+ op: "RealDiv"
+ input: "bert/encoder/layer_5/output/dropout/truediv/x"
+ input: "bert/encoder/layer_5/output/dropout/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dropout/GreaterEqual"
+ op: "GreaterEqual"
+ input: "bert/encoder/layer_5/output/dropout/random_uniform"
+ input: "bert/encoder/layer_5/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dropout/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_5/output/dense/BiasAdd"
+ input: "bert/encoder/layer_5/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dropout/Cast"
+ op: "Cast"
+ input: "bert/encoder/layer_5/output/dropout/GreaterEqual"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dropout/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_5/output/dropout/mul"
+ input: "bert/encoder/layer_5/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/add"
+ op: "Add"
+ input: "bert/encoder/layer_5/output/dropout/mul_1"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/LayerNorm/beta/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/LayerNorm/beta"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/LayerNorm/beta/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/output/LayerNorm/beta"
+ input: "bert/encoder/layer_5/output/LayerNorm/beta/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/LayerNorm/beta/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/output/LayerNorm/beta"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/LayerNorm/gamma/Initializer/ones"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/LayerNorm/gamma"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/LayerNorm/gamma/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_5/output/LayerNorm/gamma/Initializer/ones"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/LayerNorm/gamma/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/output/LayerNorm/gamma"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/LayerNorm/moments/mean/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/LayerNorm/moments/mean"
+ op: "Mean"
+ input: "bert/encoder/layer_5/output/add"
+ input: "bert/encoder/layer_5/output/LayerNorm/moments/mean/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/LayerNorm/moments/StopGradient"
+ op: "StopGradient"
+ input: "bert/encoder/layer_5/output/LayerNorm/moments/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference"
+ op: "SquaredDifference"
+ input: "bert/encoder/layer_5/output/add"
+ input: "bert/encoder/layer_5/output/LayerNorm/moments/StopGradient"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/LayerNorm/moments/variance/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/LayerNorm/moments/variance"
+ op: "Mean"
+ input: "bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference"
+ input: "bert/encoder/layer_5/output/LayerNorm/moments/variance/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/LayerNorm/batchnorm/add/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999960041972e-13
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/LayerNorm/batchnorm/add"
+ op: "Add"
+ input: "bert/encoder/layer_5/output/LayerNorm/moments/variance"
+ input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/add/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/LayerNorm/batchnorm/Rsqrt"
+ op: "Rsqrt"
+ input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/LayerNorm/batchnorm/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/Rsqrt"
+ input: "bert/encoder/layer_5/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_5/output/add"
+ input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2"
+ op: "Mul"
+ input: "bert/encoder/layer_5/output/LayerNorm/moments/mean"
+ input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/LayerNorm/batchnorm/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_5/output/LayerNorm/beta/read"
+ input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1"
+ op: "Add"
+ input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_1"
+ input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/query/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/query/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/query/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_6/attention/self/query/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/query/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_6/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_6/attention/self/query/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/query/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_6/attention/self/query/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_6/attention/self/query/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/query/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/query/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/query/kernel"
+ input: "bert/encoder/layer_6/attention/self/query/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/query/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/attention/self/query/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/query/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/query/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/query/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/query/bias"
+ input: "bert/encoder/layer_6/attention/self/query/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/query/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/attention/self/query/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/query/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_6/attention/self/query/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/query/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_6/attention/self/query/MatMul"
+ input: "bert/encoder/layer_6/attention/self/query/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/key/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/key/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/key/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_6/attention/self/key/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/key/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_6/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_6/attention/self/key/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/key/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_6/attention/self/key/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_6/attention/self/key/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/key/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/key/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/key/kernel"
+ input: "bert/encoder/layer_6/attention/self/key/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/key/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/attention/self/key/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/key/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/key/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/key/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/key/bias"
+ input: "bert/encoder/layer_6/attention/self/key/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/key/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/attention/self/key/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/key/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_6/attention/self/key/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/key/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_6/attention/self/key/MatMul"
+ input: "bert/encoder/layer_6/attention/self/key/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/value/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/value/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/value/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_6/attention/self/value/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/value/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_6/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_6/attention/self/value/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/value/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_6/attention/self/value/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_6/attention/self/value/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/value/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/value/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/value/kernel"
+ input: "bert/encoder/layer_6/attention/self/value/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/value/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/attention/self/value/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/value/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/value/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/value/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/value/bias"
+ input: "bert/encoder/layer_6/attention/self/value/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/value/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/attention/self/value/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/value/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_6/attention/self/value/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/value/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_6/attention/self/value/MatMul"
+ input: "bert/encoder/layer_6/attention/self/value/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/Reshape/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/Reshape"
+ op: "Reshape"
+ input: "bert/encoder/layer_6/attention/self/query/BiasAdd"
+ input: "bert/encoder/layer_6/attention/self/Reshape/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/transpose/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/transpose"
+ op: "Transpose"
+ input: "bert/encoder/layer_6/attention/self/Reshape"
+ input: "bert/encoder/layer_6/attention/self/transpose/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/Reshape_1/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/Reshape_1"
+ op: "Reshape"
+ input: "bert/encoder/layer_6/attention/self/key/BiasAdd"
+ input: "bert/encoder/layer_6/attention/self/Reshape_1/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/transpose_1/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/transpose_1"
+ op: "Transpose"
+ input: "bert/encoder/layer_6/attention/self/Reshape_1"
+ input: "bert/encoder/layer_6/attention/self/transpose_1/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/MatMul"
+ op: "BatchMatMulV2"
+ input: "bert/encoder/layer_6/attention/self/transpose"
+ input: "bert/encoder/layer_6/attention/self/transpose_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/Mul/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.125
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/Mul"
+ op: "Mul"
+ input: "bert/encoder/layer_6/attention/self/MatMul"
+ input: "bert/encoder/layer_6/attention/self/Mul/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/ExpandDims/dim"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/ExpandDims"
+ op: "ExpandDims"
+ input: "bert/encoder/mul"
+ input: "bert/encoder/layer_6/attention/self/ExpandDims/dim"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tdim"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_6/attention/self/sub/x"
+ input: "bert/encoder/layer_6/attention/self/ExpandDims"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/mul_1/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: -10000.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_6/attention/self/sub"
+ input: "bert/encoder/layer_6/attention/self/mul_1/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/add"
+ op: "Add"
+ input: "bert/encoder/layer_6/attention/self/Mul"
+ input: "bert/encoder/layer_6/attention/self/mul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/Softmax"
+ op: "Softmax"
+ input: "bert/encoder/layer_6/attention/self/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/dropout/rate"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/dropout/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/dropout/random_uniform/min"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/dropout/random_uniform/max"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/dropout/random_uniform/RandomUniform"
+ op: "RandomUniform"
+ input: "bert/encoder/layer_6/attention/self/dropout/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/dropout/random_uniform/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_6/attention/self/dropout/random_uniform/max"
+ input: "bert/encoder/layer_6/attention/self/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/dropout/random_uniform/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_6/attention/self/dropout/random_uniform/RandomUniform"
+ input: "bert/encoder/layer_6/attention/self/dropout/random_uniform/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/dropout/random_uniform"
+ op: "Add"
+ input: "bert/encoder/layer_6/attention/self/dropout/random_uniform/mul"
+ input: "bert/encoder/layer_6/attention/self/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/dropout/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/dropout/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_6/attention/self/dropout/sub/x"
+ input: "bert/encoder/layer_6/attention/self/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/dropout/truediv/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/dropout/truediv"
+ op: "RealDiv"
+ input: "bert/encoder/layer_6/attention/self/dropout/truediv/x"
+ input: "bert/encoder/layer_6/attention/self/dropout/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/dropout/GreaterEqual"
+ op: "GreaterEqual"
+ input: "bert/encoder/layer_6/attention/self/dropout/random_uniform"
+ input: "bert/encoder/layer_6/attention/self/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/dropout/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_6/attention/self/Softmax"
+ input: "bert/encoder/layer_6/attention/self/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/dropout/Cast"
+ op: "Cast"
+ input: "bert/encoder/layer_6/attention/self/dropout/GreaterEqual"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/dropout/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_6/attention/self/dropout/mul"
+ input: "bert/encoder/layer_6/attention/self/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/Reshape_2/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/Reshape_2"
+ op: "Reshape"
+ input: "bert/encoder/layer_6/attention/self/value/BiasAdd"
+ input: "bert/encoder/layer_6/attention/self/Reshape_2/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/transpose_2/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/transpose_2"
+ op: "Transpose"
+ input: "bert/encoder/layer_6/attention/self/Reshape_2"
+ input: "bert/encoder/layer_6/attention/self/transpose_2/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/MatMul_1"
+ op: "BatchMatMulV2"
+ input: "bert/encoder/layer_6/attention/self/dropout/mul_1"
+ input: "bert/encoder/layer_6/attention/self/transpose_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/transpose_3/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/transpose_3"
+ op: "Transpose"
+ input: "bert/encoder/layer_6/attention/self/MatMul_1"
+ input: "bert/encoder/layer_6/attention/self/transpose_3/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/Reshape_3/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/Reshape_3"
+ op: "Reshape"
+ input: "bert/encoder/layer_6/attention/self/transpose_3"
+ input: "bert/encoder/layer_6/attention/self/Reshape_3/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dense/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dense/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dense/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_6/attention/output/dense/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dense/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_6/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_6/attention/output/dense/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dense/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_6/attention/output/dense/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_6/attention/output/dense/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dense/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dense/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/output/dense/kernel"
+ input: "bert/encoder/layer_6/attention/output/dense/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dense/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/attention/output/dense/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dense/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dense/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dense/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/output/dense/bias"
+ input: "bert/encoder/layer_6/attention/output/dense/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dense/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/attention/output/dense/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dense/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_6/attention/self/Reshape_3"
+ input: "bert/encoder/layer_6/attention/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dense/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_6/attention/output/dense/MatMul"
+ input: "bert/encoder/layer_6/attention/output/dense/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dropout/rate"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dropout/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dropout/random_uniform/min"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dropout/random_uniform/max"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dropout/random_uniform/RandomUniform"
+ op: "RandomUniform"
+ input: "bert/encoder/layer_6/attention/output/dropout/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dropout/random_uniform/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_6/attention/output/dropout/random_uniform/max"
+ input: "bert/encoder/layer_6/attention/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dropout/random_uniform/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_6/attention/output/dropout/random_uniform/RandomUniform"
+ input: "bert/encoder/layer_6/attention/output/dropout/random_uniform/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dropout/random_uniform"
+ op: "Add"
+ input: "bert/encoder/layer_6/attention/output/dropout/random_uniform/mul"
+ input: "bert/encoder/layer_6/attention/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dropout/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dropout/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_6/attention/output/dropout/sub/x"
+ input: "bert/encoder/layer_6/attention/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dropout/truediv/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dropout/truediv"
+ op: "RealDiv"
+ input: "bert/encoder/layer_6/attention/output/dropout/truediv/x"
+ input: "bert/encoder/layer_6/attention/output/dropout/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dropout/GreaterEqual"
+ op: "GreaterEqual"
+ input: "bert/encoder/layer_6/attention/output/dropout/random_uniform"
+ input: "bert/encoder/layer_6/attention/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dropout/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_6/attention/output/dense/BiasAdd"
+ input: "bert/encoder/layer_6/attention/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dropout/Cast"
+ op: "Cast"
+ input: "bert/encoder/layer_6/attention/output/dropout/GreaterEqual"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dropout/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_6/attention/output/dropout/mul"
+ input: "bert/encoder/layer_6/attention/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/add"
+ op: "Add"
+ input: "bert/encoder/layer_6/attention/output/dropout/mul_1"
+ input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/LayerNorm/beta/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/LayerNorm/beta"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/LayerNorm/beta/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/beta"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/LayerNorm/beta/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/beta"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/Initializer/ones"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/LayerNorm/gamma"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/Initializer/ones"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/LayerNorm/moments/mean/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/LayerNorm/moments/mean"
+ op: "Mean"
+ input: "bert/encoder/layer_6/attention/output/add"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/moments/mean/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/LayerNorm/moments/StopGradient"
+ op: "StopGradient"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/moments/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference"
+ op: "SquaredDifference"
+ input: "bert/encoder/layer_6/attention/output/add"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/moments/StopGradient"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/LayerNorm/moments/variance/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/LayerNorm/moments/variance"
+ op: "Mean"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/moments/variance/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999960041972e-13
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add"
+ op: "Add"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/moments/variance"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/Rsqrt"
+ op: "Rsqrt"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/Rsqrt"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_6/attention/output/add"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2"
+ op: "Mul"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/moments/mean"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/read"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_1"
+ op: "Add"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_1"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_6/intermediate/dense/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_6/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_6/intermediate/dense/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_6/intermediate/dense/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_6/intermediate/dense/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/intermediate/dense/kernel"
+ input: "bert/encoder/layer_6/intermediate/dense/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/intermediate/dense/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/bias/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 3072
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/bias/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/bias/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_6/intermediate/dense/bias/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_6/intermediate/dense/bias/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/intermediate/dense/bias"
+ input: "bert/encoder/layer_6/intermediate/dense/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/intermediate/dense/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_6/intermediate/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_6/intermediate/dense/MatMul"
+ input: "bert/encoder/layer_6/intermediate/dense/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/Pow/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 3.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/Pow"
+ op: "Pow"
+ input: "bert/encoder/layer_6/intermediate/dense/BiasAdd"
+ input: "bert/encoder/layer_6/intermediate/dense/Pow/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/mul/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.044714998453855515
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_6/intermediate/dense/mul/x"
+ input: "bert/encoder/layer_6/intermediate/dense/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/add"
+ op: "Add"
+ input: "bert/encoder/layer_6/intermediate/dense/BiasAdd"
+ input: "bert/encoder/layer_6/intermediate/dense/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/mul_1/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.7978845834732056
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_6/intermediate/dense/mul_1/x"
+ input: "bert/encoder/layer_6/intermediate/dense/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/Tanh"
+ op: "Tanh"
+ input: "bert/encoder/layer_6/intermediate/dense/mul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/add_1/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/add_1"
+ op: "Add"
+ input: "bert/encoder/layer_6/intermediate/dense/add_1/x"
+ input: "bert/encoder/layer_6/intermediate/dense/Tanh"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/mul_2/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.5
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/mul_2"
+ op: "Mul"
+ input: "bert/encoder/layer_6/intermediate/dense/mul_2/x"
+ input: "bert/encoder/layer_6/intermediate/dense/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/mul_3"
+ op: "Mul"
+ input: "bert/encoder/layer_6/intermediate/dense/BiasAdd"
+ input: "bert/encoder/layer_6/intermediate/dense/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dense/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\014\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dense/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dense/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_6/output/dense/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dense/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_6/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_6/output/dense/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dense/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_6/output/dense/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_6/output/dense/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dense/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dense/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/output/dense/kernel"
+ input: "bert/encoder/layer_6/output/dense/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dense/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/output/dense/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dense/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dense/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dense/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/output/dense/bias"
+ input: "bert/encoder/layer_6/output/dense/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dense/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/output/dense/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dense/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_6/intermediate/dense/mul_3"
+ input: "bert/encoder/layer_6/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dense/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_6/output/dense/MatMul"
+ input: "bert/encoder/layer_6/output/dense/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dropout/rate"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dropout/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dropout/random_uniform/min"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dropout/random_uniform/max"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dropout/random_uniform/RandomUniform"
+ op: "RandomUniform"
+ input: "bert/encoder/layer_6/output/dropout/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dropout/random_uniform/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_6/output/dropout/random_uniform/max"
+ input: "bert/encoder/layer_6/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dropout/random_uniform/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_6/output/dropout/random_uniform/RandomUniform"
+ input: "bert/encoder/layer_6/output/dropout/random_uniform/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dropout/random_uniform"
+ op: "Add"
+ input: "bert/encoder/layer_6/output/dropout/random_uniform/mul"
+ input: "bert/encoder/layer_6/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dropout/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dropout/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_6/output/dropout/sub/x"
+ input: "bert/encoder/layer_6/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dropout/truediv/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dropout/truediv"
+ op: "RealDiv"
+ input: "bert/encoder/layer_6/output/dropout/truediv/x"
+ input: "bert/encoder/layer_6/output/dropout/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dropout/GreaterEqual"
+ op: "GreaterEqual"
+ input: "bert/encoder/layer_6/output/dropout/random_uniform"
+ input: "bert/encoder/layer_6/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dropout/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_6/output/dense/BiasAdd"
+ input: "bert/encoder/layer_6/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dropout/Cast"
+ op: "Cast"
+ input: "bert/encoder/layer_6/output/dropout/GreaterEqual"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dropout/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_6/output/dropout/mul"
+ input: "bert/encoder/layer_6/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/add"
+ op: "Add"
+ input: "bert/encoder/layer_6/output/dropout/mul_1"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/LayerNorm/beta/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/LayerNorm/beta"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/LayerNorm/beta/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/output/LayerNorm/beta"
+ input: "bert/encoder/layer_6/output/LayerNorm/beta/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/LayerNorm/beta/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/output/LayerNorm/beta"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/LayerNorm/gamma/Initializer/ones"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/LayerNorm/gamma"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/LayerNorm/gamma/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_6/output/LayerNorm/gamma/Initializer/ones"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/LayerNorm/gamma/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/output/LayerNorm/gamma"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/LayerNorm/moments/mean/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/LayerNorm/moments/mean"
+ op: "Mean"
+ input: "bert/encoder/layer_6/output/add"
+ input: "bert/encoder/layer_6/output/LayerNorm/moments/mean/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/LayerNorm/moments/StopGradient"
+ op: "StopGradient"
+ input: "bert/encoder/layer_6/output/LayerNorm/moments/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference"
+ op: "SquaredDifference"
+ input: "bert/encoder/layer_6/output/add"
+ input: "bert/encoder/layer_6/output/LayerNorm/moments/StopGradient"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/LayerNorm/moments/variance/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/LayerNorm/moments/variance"
+ op: "Mean"
+ input: "bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference"
+ input: "bert/encoder/layer_6/output/LayerNorm/moments/variance/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/LayerNorm/batchnorm/add/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999960041972e-13
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/LayerNorm/batchnorm/add"
+ op: "Add"
+ input: "bert/encoder/layer_6/output/LayerNorm/moments/variance"
+ input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/add/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/LayerNorm/batchnorm/Rsqrt"
+ op: "Rsqrt"
+ input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/LayerNorm/batchnorm/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/Rsqrt"
+ input: "bert/encoder/layer_6/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_6/output/add"
+ input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2"
+ op: "Mul"
+ input: "bert/encoder/layer_6/output/LayerNorm/moments/mean"
+ input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/LayerNorm/batchnorm/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_6/output/LayerNorm/beta/read"
+ input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1"
+ op: "Add"
+ input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_1"
+ input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/query/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/query/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/query/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_7/attention/self/query/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/query/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_7/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_7/attention/self/query/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/query/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_7/attention/self/query/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_7/attention/self/query/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/query/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/query/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/query/kernel"
+ input: "bert/encoder/layer_7/attention/self/query/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/query/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/attention/self/query/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/query/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/query/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/query/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/query/bias"
+ input: "bert/encoder/layer_7/attention/self/query/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/query/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/attention/self/query/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/query/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_7/attention/self/query/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/query/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_7/attention/self/query/MatMul"
+ input: "bert/encoder/layer_7/attention/self/query/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/key/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/key/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/key/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_7/attention/self/key/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/key/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_7/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_7/attention/self/key/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/key/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_7/attention/self/key/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_7/attention/self/key/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/key/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/key/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/key/kernel"
+ input: "bert/encoder/layer_7/attention/self/key/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/key/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/attention/self/key/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/key/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/key/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/key/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/key/bias"
+ input: "bert/encoder/layer_7/attention/self/key/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/key/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/attention/self/key/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/key/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_7/attention/self/key/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/key/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_7/attention/self/key/MatMul"
+ input: "bert/encoder/layer_7/attention/self/key/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/value/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/value/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/value/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_7/attention/self/value/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/value/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_7/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_7/attention/self/value/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/value/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_7/attention/self/value/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_7/attention/self/value/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/value/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/value/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/value/kernel"
+ input: "bert/encoder/layer_7/attention/self/value/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/value/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/attention/self/value/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/value/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/value/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/value/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/value/bias"
+ input: "bert/encoder/layer_7/attention/self/value/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/value/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/attention/self/value/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/value/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_7/attention/self/value/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/value/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_7/attention/self/value/MatMul"
+ input: "bert/encoder/layer_7/attention/self/value/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/Reshape/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/Reshape"
+ op: "Reshape"
+ input: "bert/encoder/layer_7/attention/self/query/BiasAdd"
+ input: "bert/encoder/layer_7/attention/self/Reshape/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/transpose/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/transpose"
+ op: "Transpose"
+ input: "bert/encoder/layer_7/attention/self/Reshape"
+ input: "bert/encoder/layer_7/attention/self/transpose/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/Reshape_1/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/Reshape_1"
+ op: "Reshape"
+ input: "bert/encoder/layer_7/attention/self/key/BiasAdd"
+ input: "bert/encoder/layer_7/attention/self/Reshape_1/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/transpose_1/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/transpose_1"
+ op: "Transpose"
+ input: "bert/encoder/layer_7/attention/self/Reshape_1"
+ input: "bert/encoder/layer_7/attention/self/transpose_1/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/MatMul"
+ op: "BatchMatMulV2"
+ input: "bert/encoder/layer_7/attention/self/transpose"
+ input: "bert/encoder/layer_7/attention/self/transpose_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/Mul/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.125
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/Mul"
+ op: "Mul"
+ input: "bert/encoder/layer_7/attention/self/MatMul"
+ input: "bert/encoder/layer_7/attention/self/Mul/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/ExpandDims/dim"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/ExpandDims"
+ op: "ExpandDims"
+ input: "bert/encoder/mul"
+ input: "bert/encoder/layer_7/attention/self/ExpandDims/dim"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tdim"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_7/attention/self/sub/x"
+ input: "bert/encoder/layer_7/attention/self/ExpandDims"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/mul_1/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: -10000.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_7/attention/self/sub"
+ input: "bert/encoder/layer_7/attention/self/mul_1/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/add"
+ op: "Add"
+ input: "bert/encoder/layer_7/attention/self/Mul"
+ input: "bert/encoder/layer_7/attention/self/mul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/Softmax"
+ op: "Softmax"
+ input: "bert/encoder/layer_7/attention/self/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/dropout/rate"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/dropout/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/dropout/random_uniform/min"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/dropout/random_uniform/max"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/dropout/random_uniform/RandomUniform"
+ op: "RandomUniform"
+ input: "bert/encoder/layer_7/attention/self/dropout/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/dropout/random_uniform/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_7/attention/self/dropout/random_uniform/max"
+ input: "bert/encoder/layer_7/attention/self/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/dropout/random_uniform/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_7/attention/self/dropout/random_uniform/RandomUniform"
+ input: "bert/encoder/layer_7/attention/self/dropout/random_uniform/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/dropout/random_uniform"
+ op: "Add"
+ input: "bert/encoder/layer_7/attention/self/dropout/random_uniform/mul"
+ input: "bert/encoder/layer_7/attention/self/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/dropout/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/dropout/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_7/attention/self/dropout/sub/x"
+ input: "bert/encoder/layer_7/attention/self/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/dropout/truediv/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/dropout/truediv"
+ op: "RealDiv"
+ input: "bert/encoder/layer_7/attention/self/dropout/truediv/x"
+ input: "bert/encoder/layer_7/attention/self/dropout/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/dropout/GreaterEqual"
+ op: "GreaterEqual"
+ input: "bert/encoder/layer_7/attention/self/dropout/random_uniform"
+ input: "bert/encoder/layer_7/attention/self/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/dropout/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_7/attention/self/Softmax"
+ input: "bert/encoder/layer_7/attention/self/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/dropout/Cast"
+ op: "Cast"
+ input: "bert/encoder/layer_7/attention/self/dropout/GreaterEqual"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/dropout/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_7/attention/self/dropout/mul"
+ input: "bert/encoder/layer_7/attention/self/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/Reshape_2/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/Reshape_2"
+ op: "Reshape"
+ input: "bert/encoder/layer_7/attention/self/value/BiasAdd"
+ input: "bert/encoder/layer_7/attention/self/Reshape_2/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/transpose_2/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/transpose_2"
+ op: "Transpose"
+ input: "bert/encoder/layer_7/attention/self/Reshape_2"
+ input: "bert/encoder/layer_7/attention/self/transpose_2/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/MatMul_1"
+ op: "BatchMatMulV2"
+ input: "bert/encoder/layer_7/attention/self/dropout/mul_1"
+ input: "bert/encoder/layer_7/attention/self/transpose_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/transpose_3/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/transpose_3"
+ op: "Transpose"
+ input: "bert/encoder/layer_7/attention/self/MatMul_1"
+ input: "bert/encoder/layer_7/attention/self/transpose_3/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/Reshape_3/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/Reshape_3"
+ op: "Reshape"
+ input: "bert/encoder/layer_7/attention/self/transpose_3"
+ input: "bert/encoder/layer_7/attention/self/Reshape_3/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dense/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dense/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dense/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_7/attention/output/dense/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dense/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_7/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_7/attention/output/dense/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dense/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_7/attention/output/dense/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_7/attention/output/dense/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dense/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dense/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/output/dense/kernel"
+ input: "bert/encoder/layer_7/attention/output/dense/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dense/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/attention/output/dense/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dense/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dense/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dense/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/output/dense/bias"
+ input: "bert/encoder/layer_7/attention/output/dense/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dense/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/attention/output/dense/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dense/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_7/attention/self/Reshape_3"
+ input: "bert/encoder/layer_7/attention/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dense/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_7/attention/output/dense/MatMul"
+ input: "bert/encoder/layer_7/attention/output/dense/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dropout/rate"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dropout/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dropout/random_uniform/min"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dropout/random_uniform/max"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dropout/random_uniform/RandomUniform"
+ op: "RandomUniform"
+ input: "bert/encoder/layer_7/attention/output/dropout/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dropout/random_uniform/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_7/attention/output/dropout/random_uniform/max"
+ input: "bert/encoder/layer_7/attention/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dropout/random_uniform/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_7/attention/output/dropout/random_uniform/RandomUniform"
+ input: "bert/encoder/layer_7/attention/output/dropout/random_uniform/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dropout/random_uniform"
+ op: "Add"
+ input: "bert/encoder/layer_7/attention/output/dropout/random_uniform/mul"
+ input: "bert/encoder/layer_7/attention/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dropout/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dropout/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_7/attention/output/dropout/sub/x"
+ input: "bert/encoder/layer_7/attention/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dropout/truediv/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dropout/truediv"
+ op: "RealDiv"
+ input: "bert/encoder/layer_7/attention/output/dropout/truediv/x"
+ input: "bert/encoder/layer_7/attention/output/dropout/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dropout/GreaterEqual"
+ op: "GreaterEqual"
+ input: "bert/encoder/layer_7/attention/output/dropout/random_uniform"
+ input: "bert/encoder/layer_7/attention/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dropout/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_7/attention/output/dense/BiasAdd"
+ input: "bert/encoder/layer_7/attention/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dropout/Cast"
+ op: "Cast"
+ input: "bert/encoder/layer_7/attention/output/dropout/GreaterEqual"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dropout/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_7/attention/output/dropout/mul"
+ input: "bert/encoder/layer_7/attention/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/add"
+ op: "Add"
+ input: "bert/encoder/layer_7/attention/output/dropout/mul_1"
+ input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/LayerNorm/beta/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/LayerNorm/beta"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/LayerNorm/beta/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/beta"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/LayerNorm/beta/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/beta"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/Initializer/ones"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/LayerNorm/gamma"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/Initializer/ones"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/LayerNorm/moments/mean/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/LayerNorm/moments/mean"
+ op: "Mean"
+ input: "bert/encoder/layer_7/attention/output/add"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/moments/mean/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/LayerNorm/moments/StopGradient"
+ op: "StopGradient"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/moments/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference"
+ op: "SquaredDifference"
+ input: "bert/encoder/layer_7/attention/output/add"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/moments/StopGradient"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/LayerNorm/moments/variance/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/LayerNorm/moments/variance"
+ op: "Mean"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/moments/variance/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999960041972e-13
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add"
+ op: "Add"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/moments/variance"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/Rsqrt"
+ op: "Rsqrt"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/Rsqrt"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_7/attention/output/add"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2"
+ op: "Mul"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/moments/mean"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/read"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_1"
+ op: "Add"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_1"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_7/intermediate/dense/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_7/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_7/intermediate/dense/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_7/intermediate/dense/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_7/intermediate/dense/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/intermediate/dense/kernel"
+ input: "bert/encoder/layer_7/intermediate/dense/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/intermediate/dense/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/bias/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 3072
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/bias/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/bias/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_7/intermediate/dense/bias/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_7/intermediate/dense/bias/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/intermediate/dense/bias"
+ input: "bert/encoder/layer_7/intermediate/dense/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/intermediate/dense/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_7/intermediate/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_7/intermediate/dense/MatMul"
+ input: "bert/encoder/layer_7/intermediate/dense/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/Pow/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 3.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/Pow"
+ op: "Pow"
+ input: "bert/encoder/layer_7/intermediate/dense/BiasAdd"
+ input: "bert/encoder/layer_7/intermediate/dense/Pow/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/mul/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.044714998453855515
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_7/intermediate/dense/mul/x"
+ input: "bert/encoder/layer_7/intermediate/dense/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/add"
+ op: "Add"
+ input: "bert/encoder/layer_7/intermediate/dense/BiasAdd"
+ input: "bert/encoder/layer_7/intermediate/dense/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/mul_1/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.7978845834732056
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_7/intermediate/dense/mul_1/x"
+ input: "bert/encoder/layer_7/intermediate/dense/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/Tanh"
+ op: "Tanh"
+ input: "bert/encoder/layer_7/intermediate/dense/mul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/add_1/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/add_1"
+ op: "Add"
+ input: "bert/encoder/layer_7/intermediate/dense/add_1/x"
+ input: "bert/encoder/layer_7/intermediate/dense/Tanh"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/mul_2/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.5
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/mul_2"
+ op: "Mul"
+ input: "bert/encoder/layer_7/intermediate/dense/mul_2/x"
+ input: "bert/encoder/layer_7/intermediate/dense/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/mul_3"
+ op: "Mul"
+ input: "bert/encoder/layer_7/intermediate/dense/BiasAdd"
+ input: "bert/encoder/layer_7/intermediate/dense/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dense/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\014\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dense/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dense/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_7/output/dense/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dense/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_7/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_7/output/dense/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dense/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_7/output/dense/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_7/output/dense/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dense/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dense/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/output/dense/kernel"
+ input: "bert/encoder/layer_7/output/dense/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dense/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/output/dense/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dense/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dense/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dense/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/output/dense/bias"
+ input: "bert/encoder/layer_7/output/dense/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dense/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/output/dense/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dense/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_7/intermediate/dense/mul_3"
+ input: "bert/encoder/layer_7/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dense/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_7/output/dense/MatMul"
+ input: "bert/encoder/layer_7/output/dense/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dropout/rate"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dropout/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dropout/random_uniform/min"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dropout/random_uniform/max"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dropout/random_uniform/RandomUniform"
+ op: "RandomUniform"
+ input: "bert/encoder/layer_7/output/dropout/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dropout/random_uniform/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_7/output/dropout/random_uniform/max"
+ input: "bert/encoder/layer_7/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dropout/random_uniform/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_7/output/dropout/random_uniform/RandomUniform"
+ input: "bert/encoder/layer_7/output/dropout/random_uniform/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dropout/random_uniform"
+ op: "Add"
+ input: "bert/encoder/layer_7/output/dropout/random_uniform/mul"
+ input: "bert/encoder/layer_7/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dropout/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dropout/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_7/output/dropout/sub/x"
+ input: "bert/encoder/layer_7/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dropout/truediv/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dropout/truediv"
+ op: "RealDiv"
+ input: "bert/encoder/layer_7/output/dropout/truediv/x"
+ input: "bert/encoder/layer_7/output/dropout/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dropout/GreaterEqual"
+ op: "GreaterEqual"
+ input: "bert/encoder/layer_7/output/dropout/random_uniform"
+ input: "bert/encoder/layer_7/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dropout/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_7/output/dense/BiasAdd"
+ input: "bert/encoder/layer_7/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dropout/Cast"
+ op: "Cast"
+ input: "bert/encoder/layer_7/output/dropout/GreaterEqual"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dropout/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_7/output/dropout/mul"
+ input: "bert/encoder/layer_7/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/add"
+ op: "Add"
+ input: "bert/encoder/layer_7/output/dropout/mul_1"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/LayerNorm/beta/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/LayerNorm/beta"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/LayerNorm/beta/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/output/LayerNorm/beta"
+ input: "bert/encoder/layer_7/output/LayerNorm/beta/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/LayerNorm/beta/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/output/LayerNorm/beta"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/LayerNorm/gamma/Initializer/ones"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/LayerNorm/gamma"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/LayerNorm/gamma/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_7/output/LayerNorm/gamma/Initializer/ones"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/LayerNorm/gamma/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/output/LayerNorm/gamma"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/LayerNorm/moments/mean/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/LayerNorm/moments/mean"
+ op: "Mean"
+ input: "bert/encoder/layer_7/output/add"
+ input: "bert/encoder/layer_7/output/LayerNorm/moments/mean/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/LayerNorm/moments/StopGradient"
+ op: "StopGradient"
+ input: "bert/encoder/layer_7/output/LayerNorm/moments/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference"
+ op: "SquaredDifference"
+ input: "bert/encoder/layer_7/output/add"
+ input: "bert/encoder/layer_7/output/LayerNorm/moments/StopGradient"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/LayerNorm/moments/variance/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/LayerNorm/moments/variance"
+ op: "Mean"
+ input: "bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference"
+ input: "bert/encoder/layer_7/output/LayerNorm/moments/variance/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/LayerNorm/batchnorm/add/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999960041972e-13
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/LayerNorm/batchnorm/add"
+ op: "Add"
+ input: "bert/encoder/layer_7/output/LayerNorm/moments/variance"
+ input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/add/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/LayerNorm/batchnorm/Rsqrt"
+ op: "Rsqrt"
+ input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/LayerNorm/batchnorm/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/Rsqrt"
+ input: "bert/encoder/layer_7/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_7/output/add"
+ input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2"
+ op: "Mul"
+ input: "bert/encoder/layer_7/output/LayerNorm/moments/mean"
+ input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/LayerNorm/batchnorm/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_7/output/LayerNorm/beta/read"
+ input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1"
+ op: "Add"
+ input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_1"
+ input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/query/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/query/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/query/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_8/attention/self/query/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/query/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_8/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_8/attention/self/query/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/query/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_8/attention/self/query/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_8/attention/self/query/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/query/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/query/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/query/kernel"
+ input: "bert/encoder/layer_8/attention/self/query/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/query/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/attention/self/query/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/query/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/query/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/query/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/query/bias"
+ input: "bert/encoder/layer_8/attention/self/query/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/query/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/attention/self/query/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/query/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_8/attention/self/query/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/query/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_8/attention/self/query/MatMul"
+ input: "bert/encoder/layer_8/attention/self/query/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/key/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/key/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/key/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_8/attention/self/key/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/key/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_8/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_8/attention/self/key/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/key/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_8/attention/self/key/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_8/attention/self/key/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/key/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/key/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/key/kernel"
+ input: "bert/encoder/layer_8/attention/self/key/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/key/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/attention/self/key/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/key/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/key/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/key/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/key/bias"
+ input: "bert/encoder/layer_8/attention/self/key/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/key/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/attention/self/key/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/key/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_8/attention/self/key/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/key/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_8/attention/self/key/MatMul"
+ input: "bert/encoder/layer_8/attention/self/key/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/value/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/value/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/value/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_8/attention/self/value/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/value/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_8/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_8/attention/self/value/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/value/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_8/attention/self/value/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_8/attention/self/value/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/value/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/value/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/value/kernel"
+ input: "bert/encoder/layer_8/attention/self/value/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/value/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/attention/self/value/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/value/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/value/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/value/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/value/bias"
+ input: "bert/encoder/layer_8/attention/self/value/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/value/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/attention/self/value/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/value/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_8/attention/self/value/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/value/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_8/attention/self/value/MatMul"
+ input: "bert/encoder/layer_8/attention/self/value/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/Reshape/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/Reshape"
+ op: "Reshape"
+ input: "bert/encoder/layer_8/attention/self/query/BiasAdd"
+ input: "bert/encoder/layer_8/attention/self/Reshape/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/transpose/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/transpose"
+ op: "Transpose"
+ input: "bert/encoder/layer_8/attention/self/Reshape"
+ input: "bert/encoder/layer_8/attention/self/transpose/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/Reshape_1/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/Reshape_1"
+ op: "Reshape"
+ input: "bert/encoder/layer_8/attention/self/key/BiasAdd"
+ input: "bert/encoder/layer_8/attention/self/Reshape_1/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/transpose_1/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/transpose_1"
+ op: "Transpose"
+ input: "bert/encoder/layer_8/attention/self/Reshape_1"
+ input: "bert/encoder/layer_8/attention/self/transpose_1/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/MatMul"
+ op: "BatchMatMulV2"
+ input: "bert/encoder/layer_8/attention/self/transpose"
+ input: "bert/encoder/layer_8/attention/self/transpose_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/Mul/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.125
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/Mul"
+ op: "Mul"
+ input: "bert/encoder/layer_8/attention/self/MatMul"
+ input: "bert/encoder/layer_8/attention/self/Mul/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/ExpandDims/dim"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/ExpandDims"
+ op: "ExpandDims"
+ input: "bert/encoder/mul"
+ input: "bert/encoder/layer_8/attention/self/ExpandDims/dim"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tdim"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_8/attention/self/sub/x"
+ input: "bert/encoder/layer_8/attention/self/ExpandDims"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/mul_1/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: -10000.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_8/attention/self/sub"
+ input: "bert/encoder/layer_8/attention/self/mul_1/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/add"
+ op: "Add"
+ input: "bert/encoder/layer_8/attention/self/Mul"
+ input: "bert/encoder/layer_8/attention/self/mul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/Softmax"
+ op: "Softmax"
+ input: "bert/encoder/layer_8/attention/self/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/dropout/rate"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/dropout/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/dropout/random_uniform/min"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/dropout/random_uniform/max"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/dropout/random_uniform/RandomUniform"
+ op: "RandomUniform"
+ input: "bert/encoder/layer_8/attention/self/dropout/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/dropout/random_uniform/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_8/attention/self/dropout/random_uniform/max"
+ input: "bert/encoder/layer_8/attention/self/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/dropout/random_uniform/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_8/attention/self/dropout/random_uniform/RandomUniform"
+ input: "bert/encoder/layer_8/attention/self/dropout/random_uniform/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/dropout/random_uniform"
+ op: "Add"
+ input: "bert/encoder/layer_8/attention/self/dropout/random_uniform/mul"
+ input: "bert/encoder/layer_8/attention/self/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/dropout/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/dropout/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_8/attention/self/dropout/sub/x"
+ input: "bert/encoder/layer_8/attention/self/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/dropout/truediv/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/dropout/truediv"
+ op: "RealDiv"
+ input: "bert/encoder/layer_8/attention/self/dropout/truediv/x"
+ input: "bert/encoder/layer_8/attention/self/dropout/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/dropout/GreaterEqual"
+ op: "GreaterEqual"
+ input: "bert/encoder/layer_8/attention/self/dropout/random_uniform"
+ input: "bert/encoder/layer_8/attention/self/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/dropout/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_8/attention/self/Softmax"
+ input: "bert/encoder/layer_8/attention/self/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/dropout/Cast"
+ op: "Cast"
+ input: "bert/encoder/layer_8/attention/self/dropout/GreaterEqual"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/dropout/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_8/attention/self/dropout/mul"
+ input: "bert/encoder/layer_8/attention/self/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/Reshape_2/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/Reshape_2"
+ op: "Reshape"
+ input: "bert/encoder/layer_8/attention/self/value/BiasAdd"
+ input: "bert/encoder/layer_8/attention/self/Reshape_2/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/transpose_2/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/transpose_2"
+ op: "Transpose"
+ input: "bert/encoder/layer_8/attention/self/Reshape_2"
+ input: "bert/encoder/layer_8/attention/self/transpose_2/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/MatMul_1"
+ op: "BatchMatMulV2"
+ input: "bert/encoder/layer_8/attention/self/dropout/mul_1"
+ input: "bert/encoder/layer_8/attention/self/transpose_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/transpose_3/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/transpose_3"
+ op: "Transpose"
+ input: "bert/encoder/layer_8/attention/self/MatMul_1"
+ input: "bert/encoder/layer_8/attention/self/transpose_3/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/Reshape_3/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/Reshape_3"
+ op: "Reshape"
+ input: "bert/encoder/layer_8/attention/self/transpose_3"
+ input: "bert/encoder/layer_8/attention/self/Reshape_3/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dense/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dense/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dense/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_8/attention/output/dense/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dense/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_8/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_8/attention/output/dense/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dense/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_8/attention/output/dense/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_8/attention/output/dense/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dense/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dense/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/output/dense/kernel"
+ input: "bert/encoder/layer_8/attention/output/dense/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dense/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/attention/output/dense/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dense/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dense/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dense/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/output/dense/bias"
+ input: "bert/encoder/layer_8/attention/output/dense/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dense/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/attention/output/dense/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dense/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_8/attention/self/Reshape_3"
+ input: "bert/encoder/layer_8/attention/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dense/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_8/attention/output/dense/MatMul"
+ input: "bert/encoder/layer_8/attention/output/dense/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dropout/rate"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dropout/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dropout/random_uniform/min"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dropout/random_uniform/max"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dropout/random_uniform/RandomUniform"
+ op: "RandomUniform"
+ input: "bert/encoder/layer_8/attention/output/dropout/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dropout/random_uniform/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_8/attention/output/dropout/random_uniform/max"
+ input: "bert/encoder/layer_8/attention/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dropout/random_uniform/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_8/attention/output/dropout/random_uniform/RandomUniform"
+ input: "bert/encoder/layer_8/attention/output/dropout/random_uniform/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dropout/random_uniform"
+ op: "Add"
+ input: "bert/encoder/layer_8/attention/output/dropout/random_uniform/mul"
+ input: "bert/encoder/layer_8/attention/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dropout/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dropout/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_8/attention/output/dropout/sub/x"
+ input: "bert/encoder/layer_8/attention/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dropout/truediv/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dropout/truediv"
+ op: "RealDiv"
+ input: "bert/encoder/layer_8/attention/output/dropout/truediv/x"
+ input: "bert/encoder/layer_8/attention/output/dropout/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dropout/GreaterEqual"
+ op: "GreaterEqual"
+ input: "bert/encoder/layer_8/attention/output/dropout/random_uniform"
+ input: "bert/encoder/layer_8/attention/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dropout/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_8/attention/output/dense/BiasAdd"
+ input: "bert/encoder/layer_8/attention/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dropout/Cast"
+ op: "Cast"
+ input: "bert/encoder/layer_8/attention/output/dropout/GreaterEqual"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dropout/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_8/attention/output/dropout/mul"
+ input: "bert/encoder/layer_8/attention/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/add"
+ op: "Add"
+ input: "bert/encoder/layer_8/attention/output/dropout/mul_1"
+ input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/LayerNorm/beta/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/LayerNorm/beta"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/LayerNorm/beta/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/beta"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/LayerNorm/beta/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/beta"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/Initializer/ones"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/LayerNorm/gamma"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/Initializer/ones"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/LayerNorm/moments/mean/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/LayerNorm/moments/mean"
+ op: "Mean"
+ input: "bert/encoder/layer_8/attention/output/add"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/moments/mean/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/LayerNorm/moments/StopGradient"
+ op: "StopGradient"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/moments/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference"
+ op: "SquaredDifference"
+ input: "bert/encoder/layer_8/attention/output/add"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/moments/StopGradient"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/LayerNorm/moments/variance/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/LayerNorm/moments/variance"
+ op: "Mean"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/moments/variance/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999960041972e-13
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add"
+ op: "Add"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/moments/variance"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/Rsqrt"
+ op: "Rsqrt"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/Rsqrt"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_8/attention/output/add"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2"
+ op: "Mul"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/moments/mean"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/read"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_1"
+ op: "Add"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_1"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_8/intermediate/dense/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_8/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_8/intermediate/dense/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_8/intermediate/dense/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_8/intermediate/dense/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/intermediate/dense/kernel"
+ input: "bert/encoder/layer_8/intermediate/dense/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/intermediate/dense/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/bias/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 3072
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/bias/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/bias/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_8/intermediate/dense/bias/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_8/intermediate/dense/bias/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/intermediate/dense/bias"
+ input: "bert/encoder/layer_8/intermediate/dense/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/intermediate/dense/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_8/intermediate/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_8/intermediate/dense/MatMul"
+ input: "bert/encoder/layer_8/intermediate/dense/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/Pow/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 3.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/Pow"
+ op: "Pow"
+ input: "bert/encoder/layer_8/intermediate/dense/BiasAdd"
+ input: "bert/encoder/layer_8/intermediate/dense/Pow/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/mul/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.044714998453855515
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_8/intermediate/dense/mul/x"
+ input: "bert/encoder/layer_8/intermediate/dense/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/add"
+ op: "Add"
+ input: "bert/encoder/layer_8/intermediate/dense/BiasAdd"
+ input: "bert/encoder/layer_8/intermediate/dense/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/mul_1/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.7978845834732056
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_8/intermediate/dense/mul_1/x"
+ input: "bert/encoder/layer_8/intermediate/dense/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/Tanh"
+ op: "Tanh"
+ input: "bert/encoder/layer_8/intermediate/dense/mul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/add_1/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/add_1"
+ op: "Add"
+ input: "bert/encoder/layer_8/intermediate/dense/add_1/x"
+ input: "bert/encoder/layer_8/intermediate/dense/Tanh"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/mul_2/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.5
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/mul_2"
+ op: "Mul"
+ input: "bert/encoder/layer_8/intermediate/dense/mul_2/x"
+ input: "bert/encoder/layer_8/intermediate/dense/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/mul_3"
+ op: "Mul"
+ input: "bert/encoder/layer_8/intermediate/dense/BiasAdd"
+ input: "bert/encoder/layer_8/intermediate/dense/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dense/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\014\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dense/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dense/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_8/output/dense/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dense/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_8/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_8/output/dense/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dense/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_8/output/dense/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_8/output/dense/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dense/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dense/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/output/dense/kernel"
+ input: "bert/encoder/layer_8/output/dense/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dense/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/output/dense/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dense/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dense/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dense/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/output/dense/bias"
+ input: "bert/encoder/layer_8/output/dense/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dense/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/output/dense/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dense/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_8/intermediate/dense/mul_3"
+ input: "bert/encoder/layer_8/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dense/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_8/output/dense/MatMul"
+ input: "bert/encoder/layer_8/output/dense/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dropout/rate"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dropout/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dropout/random_uniform/min"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dropout/random_uniform/max"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dropout/random_uniform/RandomUniform"
+ op: "RandomUniform"
+ input: "bert/encoder/layer_8/output/dropout/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dropout/random_uniform/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_8/output/dropout/random_uniform/max"
+ input: "bert/encoder/layer_8/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dropout/random_uniform/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_8/output/dropout/random_uniform/RandomUniform"
+ input: "bert/encoder/layer_8/output/dropout/random_uniform/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dropout/random_uniform"
+ op: "Add"
+ input: "bert/encoder/layer_8/output/dropout/random_uniform/mul"
+ input: "bert/encoder/layer_8/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dropout/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dropout/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_8/output/dropout/sub/x"
+ input: "bert/encoder/layer_8/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dropout/truediv/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dropout/truediv"
+ op: "RealDiv"
+ input: "bert/encoder/layer_8/output/dropout/truediv/x"
+ input: "bert/encoder/layer_8/output/dropout/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dropout/GreaterEqual"
+ op: "GreaterEqual"
+ input: "bert/encoder/layer_8/output/dropout/random_uniform"
+ input: "bert/encoder/layer_8/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dropout/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_8/output/dense/BiasAdd"
+ input: "bert/encoder/layer_8/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dropout/Cast"
+ op: "Cast"
+ input: "bert/encoder/layer_8/output/dropout/GreaterEqual"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dropout/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_8/output/dropout/mul"
+ input: "bert/encoder/layer_8/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/add"
+ op: "Add"
+ input: "bert/encoder/layer_8/output/dropout/mul_1"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/LayerNorm/beta/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/LayerNorm/beta"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/LayerNorm/beta/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/output/LayerNorm/beta"
+ input: "bert/encoder/layer_8/output/LayerNorm/beta/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/LayerNorm/beta/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/output/LayerNorm/beta"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/LayerNorm/gamma/Initializer/ones"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/LayerNorm/gamma"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/LayerNorm/gamma/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_8/output/LayerNorm/gamma/Initializer/ones"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/LayerNorm/gamma/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/output/LayerNorm/gamma"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/LayerNorm/moments/mean/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/LayerNorm/moments/mean"
+ op: "Mean"
+ input: "bert/encoder/layer_8/output/add"
+ input: "bert/encoder/layer_8/output/LayerNorm/moments/mean/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/LayerNorm/moments/StopGradient"
+ op: "StopGradient"
+ input: "bert/encoder/layer_8/output/LayerNorm/moments/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference"
+ op: "SquaredDifference"
+ input: "bert/encoder/layer_8/output/add"
+ input: "bert/encoder/layer_8/output/LayerNorm/moments/StopGradient"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/LayerNorm/moments/variance/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/LayerNorm/moments/variance"
+ op: "Mean"
+ input: "bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference"
+ input: "bert/encoder/layer_8/output/LayerNorm/moments/variance/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/LayerNorm/batchnorm/add/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999960041972e-13
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/LayerNorm/batchnorm/add"
+ op: "Add"
+ input: "bert/encoder/layer_8/output/LayerNorm/moments/variance"
+ input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/add/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/LayerNorm/batchnorm/Rsqrt"
+ op: "Rsqrt"
+ input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/LayerNorm/batchnorm/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/Rsqrt"
+ input: "bert/encoder/layer_8/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_8/output/add"
+ input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2"
+ op: "Mul"
+ input: "bert/encoder/layer_8/output/LayerNorm/moments/mean"
+ input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/LayerNorm/batchnorm/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_8/output/LayerNorm/beta/read"
+ input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1"
+ op: "Add"
+ input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_1"
+ input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/query/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/query/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/query/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_9/attention/self/query/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/query/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_9/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_9/attention/self/query/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/query/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_9/attention/self/query/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_9/attention/self/query/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/query/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/query/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/query/kernel"
+ input: "bert/encoder/layer_9/attention/self/query/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/query/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/attention/self/query/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/query/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/query/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/query/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/query/bias"
+ input: "bert/encoder/layer_9/attention/self/query/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/query/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/attention/self/query/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/query/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_9/attention/self/query/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/query/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_9/attention/self/query/MatMul"
+ input: "bert/encoder/layer_9/attention/self/query/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/key/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/key/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/key/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_9/attention/self/key/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/key/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_9/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_9/attention/self/key/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/key/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_9/attention/self/key/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_9/attention/self/key/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/key/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/key/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/key/kernel"
+ input: "bert/encoder/layer_9/attention/self/key/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/key/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/attention/self/key/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/key/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/key/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/key/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/key/bias"
+ input: "bert/encoder/layer_9/attention/self/key/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/key/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/attention/self/key/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/key/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_9/attention/self/key/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/key/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_9/attention/self/key/MatMul"
+ input: "bert/encoder/layer_9/attention/self/key/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/value/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/value/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/value/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_9/attention/self/value/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/value/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_9/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_9/attention/self/value/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/value/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_9/attention/self/value/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_9/attention/self/value/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/value/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/value/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/value/kernel"
+ input: "bert/encoder/layer_9/attention/self/value/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/value/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/attention/self/value/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/value/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/value/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/value/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/value/bias"
+ input: "bert/encoder/layer_9/attention/self/value/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/value/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/attention/self/value/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/value/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_9/attention/self/value/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/value/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_9/attention/self/value/MatMul"
+ input: "bert/encoder/layer_9/attention/self/value/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/Reshape/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/Reshape"
+ op: "Reshape"
+ input: "bert/encoder/layer_9/attention/self/query/BiasAdd"
+ input: "bert/encoder/layer_9/attention/self/Reshape/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/transpose/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/transpose"
+ op: "Transpose"
+ input: "bert/encoder/layer_9/attention/self/Reshape"
+ input: "bert/encoder/layer_9/attention/self/transpose/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/Reshape_1/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/Reshape_1"
+ op: "Reshape"
+ input: "bert/encoder/layer_9/attention/self/key/BiasAdd"
+ input: "bert/encoder/layer_9/attention/self/Reshape_1/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/transpose_1/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/transpose_1"
+ op: "Transpose"
+ input: "bert/encoder/layer_9/attention/self/Reshape_1"
+ input: "bert/encoder/layer_9/attention/self/transpose_1/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/MatMul"
+ op: "BatchMatMulV2"
+ input: "bert/encoder/layer_9/attention/self/transpose"
+ input: "bert/encoder/layer_9/attention/self/transpose_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/Mul/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.125
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/Mul"
+ op: "Mul"
+ input: "bert/encoder/layer_9/attention/self/MatMul"
+ input: "bert/encoder/layer_9/attention/self/Mul/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/ExpandDims/dim"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/ExpandDims"
+ op: "ExpandDims"
+ input: "bert/encoder/mul"
+ input: "bert/encoder/layer_9/attention/self/ExpandDims/dim"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tdim"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_9/attention/self/sub/x"
+ input: "bert/encoder/layer_9/attention/self/ExpandDims"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/mul_1/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: -10000.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_9/attention/self/sub"
+ input: "bert/encoder/layer_9/attention/self/mul_1/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/add"
+ op: "Add"
+ input: "bert/encoder/layer_9/attention/self/Mul"
+ input: "bert/encoder/layer_9/attention/self/mul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/Softmax"
+ op: "Softmax"
+ input: "bert/encoder/layer_9/attention/self/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/dropout/rate"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/dropout/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/dropout/random_uniform/min"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/dropout/random_uniform/max"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/dropout/random_uniform/RandomUniform"
+ op: "RandomUniform"
+ input: "bert/encoder/layer_9/attention/self/dropout/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/dropout/random_uniform/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_9/attention/self/dropout/random_uniform/max"
+ input: "bert/encoder/layer_9/attention/self/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/dropout/random_uniform/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_9/attention/self/dropout/random_uniform/RandomUniform"
+ input: "bert/encoder/layer_9/attention/self/dropout/random_uniform/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/dropout/random_uniform"
+ op: "Add"
+ input: "bert/encoder/layer_9/attention/self/dropout/random_uniform/mul"
+ input: "bert/encoder/layer_9/attention/self/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/dropout/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/dropout/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_9/attention/self/dropout/sub/x"
+ input: "bert/encoder/layer_9/attention/self/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/dropout/truediv/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/dropout/truediv"
+ op: "RealDiv"
+ input: "bert/encoder/layer_9/attention/self/dropout/truediv/x"
+ input: "bert/encoder/layer_9/attention/self/dropout/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/dropout/GreaterEqual"
+ op: "GreaterEqual"
+ input: "bert/encoder/layer_9/attention/self/dropout/random_uniform"
+ input: "bert/encoder/layer_9/attention/self/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/dropout/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_9/attention/self/Softmax"
+ input: "bert/encoder/layer_9/attention/self/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/dropout/Cast"
+ op: "Cast"
+ input: "bert/encoder/layer_9/attention/self/dropout/GreaterEqual"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/dropout/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_9/attention/self/dropout/mul"
+ input: "bert/encoder/layer_9/attention/self/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/Reshape_2/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/Reshape_2"
+ op: "Reshape"
+ input: "bert/encoder/layer_9/attention/self/value/BiasAdd"
+ input: "bert/encoder/layer_9/attention/self/Reshape_2/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/transpose_2/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/transpose_2"
+ op: "Transpose"
+ input: "bert/encoder/layer_9/attention/self/Reshape_2"
+ input: "bert/encoder/layer_9/attention/self/transpose_2/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/MatMul_1"
+ op: "BatchMatMulV2"
+ input: "bert/encoder/layer_9/attention/self/dropout/mul_1"
+ input: "bert/encoder/layer_9/attention/self/transpose_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/transpose_3/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/transpose_3"
+ op: "Transpose"
+ input: "bert/encoder/layer_9/attention/self/MatMul_1"
+ input: "bert/encoder/layer_9/attention/self/transpose_3/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/Reshape_3/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/Reshape_3"
+ op: "Reshape"
+ input: "bert/encoder/layer_9/attention/self/transpose_3"
+ input: "bert/encoder/layer_9/attention/self/Reshape_3/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dense/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dense/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dense/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_9/attention/output/dense/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dense/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_9/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_9/attention/output/dense/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dense/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_9/attention/output/dense/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_9/attention/output/dense/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dense/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dense/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/output/dense/kernel"
+ input: "bert/encoder/layer_9/attention/output/dense/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dense/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/attention/output/dense/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dense/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dense/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dense/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/output/dense/bias"
+ input: "bert/encoder/layer_9/attention/output/dense/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dense/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/attention/output/dense/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dense/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_9/attention/self/Reshape_3"
+ input: "bert/encoder/layer_9/attention/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dense/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_9/attention/output/dense/MatMul"
+ input: "bert/encoder/layer_9/attention/output/dense/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dropout/rate"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dropout/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dropout/random_uniform/min"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dropout/random_uniform/max"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dropout/random_uniform/RandomUniform"
+ op: "RandomUniform"
+ input: "bert/encoder/layer_9/attention/output/dropout/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dropout/random_uniform/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_9/attention/output/dropout/random_uniform/max"
+ input: "bert/encoder/layer_9/attention/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dropout/random_uniform/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_9/attention/output/dropout/random_uniform/RandomUniform"
+ input: "bert/encoder/layer_9/attention/output/dropout/random_uniform/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dropout/random_uniform"
+ op: "Add"
+ input: "bert/encoder/layer_9/attention/output/dropout/random_uniform/mul"
+ input: "bert/encoder/layer_9/attention/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dropout/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dropout/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_9/attention/output/dropout/sub/x"
+ input: "bert/encoder/layer_9/attention/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dropout/truediv/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dropout/truediv"
+ op: "RealDiv"
+ input: "bert/encoder/layer_9/attention/output/dropout/truediv/x"
+ input: "bert/encoder/layer_9/attention/output/dropout/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dropout/GreaterEqual"
+ op: "GreaterEqual"
+ input: "bert/encoder/layer_9/attention/output/dropout/random_uniform"
+ input: "bert/encoder/layer_9/attention/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dropout/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_9/attention/output/dense/BiasAdd"
+ input: "bert/encoder/layer_9/attention/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dropout/Cast"
+ op: "Cast"
+ input: "bert/encoder/layer_9/attention/output/dropout/GreaterEqual"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dropout/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_9/attention/output/dropout/mul"
+ input: "bert/encoder/layer_9/attention/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/add"
+ op: "Add"
+ input: "bert/encoder/layer_9/attention/output/dropout/mul_1"
+ input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/LayerNorm/beta/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/LayerNorm/beta"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/LayerNorm/beta/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/beta"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/LayerNorm/beta/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/beta"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/Initializer/ones"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/LayerNorm/gamma"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/Initializer/ones"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/LayerNorm/moments/mean/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/LayerNorm/moments/mean"
+ op: "Mean"
+ input: "bert/encoder/layer_9/attention/output/add"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/moments/mean/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/LayerNorm/moments/StopGradient"
+ op: "StopGradient"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/moments/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference"
+ op: "SquaredDifference"
+ input: "bert/encoder/layer_9/attention/output/add"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/moments/StopGradient"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/LayerNorm/moments/variance/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/LayerNorm/moments/variance"
+ op: "Mean"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/moments/variance/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999960041972e-13
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add"
+ op: "Add"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/moments/variance"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/Rsqrt"
+ op: "Rsqrt"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/Rsqrt"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_9/attention/output/add"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2"
+ op: "Mul"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/moments/mean"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/read"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_1"
+ op: "Add"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_1"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_9/intermediate/dense/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_9/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_9/intermediate/dense/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_9/intermediate/dense/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_9/intermediate/dense/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/intermediate/dense/kernel"
+ input: "bert/encoder/layer_9/intermediate/dense/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/intermediate/dense/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/bias/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 3072
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/bias/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/bias/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_9/intermediate/dense/bias/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_9/intermediate/dense/bias/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/intermediate/dense/bias"
+ input: "bert/encoder/layer_9/intermediate/dense/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/intermediate/dense/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_9/intermediate/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_9/intermediate/dense/MatMul"
+ input: "bert/encoder/layer_9/intermediate/dense/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/Pow/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 3.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/Pow"
+ op: "Pow"
+ input: "bert/encoder/layer_9/intermediate/dense/BiasAdd"
+ input: "bert/encoder/layer_9/intermediate/dense/Pow/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/mul/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.044714998453855515
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_9/intermediate/dense/mul/x"
+ input: "bert/encoder/layer_9/intermediate/dense/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/add"
+ op: "Add"
+ input: "bert/encoder/layer_9/intermediate/dense/BiasAdd"
+ input: "bert/encoder/layer_9/intermediate/dense/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/mul_1/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.7978845834732056
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_9/intermediate/dense/mul_1/x"
+ input: "bert/encoder/layer_9/intermediate/dense/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/Tanh"
+ op: "Tanh"
+ input: "bert/encoder/layer_9/intermediate/dense/mul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/add_1/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/add_1"
+ op: "Add"
+ input: "bert/encoder/layer_9/intermediate/dense/add_1/x"
+ input: "bert/encoder/layer_9/intermediate/dense/Tanh"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/mul_2/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.5
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/mul_2"
+ op: "Mul"
+ input: "bert/encoder/layer_9/intermediate/dense/mul_2/x"
+ input: "bert/encoder/layer_9/intermediate/dense/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/mul_3"
+ op: "Mul"
+ input: "bert/encoder/layer_9/intermediate/dense/BiasAdd"
+ input: "bert/encoder/layer_9/intermediate/dense/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dense/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\014\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dense/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dense/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_9/output/dense/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dense/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_9/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_9/output/dense/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dense/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_9/output/dense/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_9/output/dense/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dense/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dense/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/output/dense/kernel"
+ input: "bert/encoder/layer_9/output/dense/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dense/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/output/dense/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dense/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dense/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dense/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/output/dense/bias"
+ input: "bert/encoder/layer_9/output/dense/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dense/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/output/dense/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dense/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_9/intermediate/dense/mul_3"
+ input: "bert/encoder/layer_9/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dense/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_9/output/dense/MatMul"
+ input: "bert/encoder/layer_9/output/dense/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dropout/rate"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dropout/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dropout/random_uniform/min"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dropout/random_uniform/max"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dropout/random_uniform/RandomUniform"
+ op: "RandomUniform"
+ input: "bert/encoder/layer_9/output/dropout/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dropout/random_uniform/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_9/output/dropout/random_uniform/max"
+ input: "bert/encoder/layer_9/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dropout/random_uniform/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_9/output/dropout/random_uniform/RandomUniform"
+ input: "bert/encoder/layer_9/output/dropout/random_uniform/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dropout/random_uniform"
+ op: "Add"
+ input: "bert/encoder/layer_9/output/dropout/random_uniform/mul"
+ input: "bert/encoder/layer_9/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dropout/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dropout/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_9/output/dropout/sub/x"
+ input: "bert/encoder/layer_9/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dropout/truediv/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dropout/truediv"
+ op: "RealDiv"
+ input: "bert/encoder/layer_9/output/dropout/truediv/x"
+ input: "bert/encoder/layer_9/output/dropout/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dropout/GreaterEqual"
+ op: "GreaterEqual"
+ input: "bert/encoder/layer_9/output/dropout/random_uniform"
+ input: "bert/encoder/layer_9/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dropout/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_9/output/dense/BiasAdd"
+ input: "bert/encoder/layer_9/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dropout/Cast"
+ op: "Cast"
+ input: "bert/encoder/layer_9/output/dropout/GreaterEqual"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dropout/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_9/output/dropout/mul"
+ input: "bert/encoder/layer_9/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/add"
+ op: "Add"
+ input: "bert/encoder/layer_9/output/dropout/mul_1"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/LayerNorm/beta/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/LayerNorm/beta"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/LayerNorm/beta/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/output/LayerNorm/beta"
+ input: "bert/encoder/layer_9/output/LayerNorm/beta/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/LayerNorm/beta/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/output/LayerNorm/beta"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/LayerNorm/gamma/Initializer/ones"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/LayerNorm/gamma"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/LayerNorm/gamma/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_9/output/LayerNorm/gamma/Initializer/ones"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/LayerNorm/gamma/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/output/LayerNorm/gamma"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/LayerNorm/moments/mean/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/LayerNorm/moments/mean"
+ op: "Mean"
+ input: "bert/encoder/layer_9/output/add"
+ input: "bert/encoder/layer_9/output/LayerNorm/moments/mean/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/LayerNorm/moments/StopGradient"
+ op: "StopGradient"
+ input: "bert/encoder/layer_9/output/LayerNorm/moments/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference"
+ op: "SquaredDifference"
+ input: "bert/encoder/layer_9/output/add"
+ input: "bert/encoder/layer_9/output/LayerNorm/moments/StopGradient"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/LayerNorm/moments/variance/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/LayerNorm/moments/variance"
+ op: "Mean"
+ input: "bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference"
+ input: "bert/encoder/layer_9/output/LayerNorm/moments/variance/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/LayerNorm/batchnorm/add/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999960041972e-13
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/LayerNorm/batchnorm/add"
+ op: "Add"
+ input: "bert/encoder/layer_9/output/LayerNorm/moments/variance"
+ input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/add/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/LayerNorm/batchnorm/Rsqrt"
+ op: "Rsqrt"
+ input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/LayerNorm/batchnorm/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/Rsqrt"
+ input: "bert/encoder/layer_9/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_9/output/add"
+ input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2"
+ op: "Mul"
+ input: "bert/encoder/layer_9/output/LayerNorm/moments/mean"
+ input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/LayerNorm/batchnorm/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_9/output/LayerNorm/beta/read"
+ input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1"
+ op: "Add"
+ input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_1"
+ input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/query/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/query/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/query/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_10/attention/self/query/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/query/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_10/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_10/attention/self/query/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/query/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_10/attention/self/query/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_10/attention/self/query/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/query/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/query/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/query/kernel"
+ input: "bert/encoder/layer_10/attention/self/query/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/query/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/attention/self/query/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/query/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/query/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/query/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/query/bias"
+ input: "bert/encoder/layer_10/attention/self/query/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/query/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/attention/self/query/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/query/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_10/attention/self/query/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/query/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_10/attention/self/query/MatMul"
+ input: "bert/encoder/layer_10/attention/self/query/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/key/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/key/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/key/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_10/attention/self/key/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/key/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_10/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_10/attention/self/key/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/key/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_10/attention/self/key/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_10/attention/self/key/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/key/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/key/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/key/kernel"
+ input: "bert/encoder/layer_10/attention/self/key/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/key/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/attention/self/key/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/key/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/key/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/key/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/key/bias"
+ input: "bert/encoder/layer_10/attention/self/key/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/key/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/attention/self/key/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/key/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_10/attention/self/key/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/key/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_10/attention/self/key/MatMul"
+ input: "bert/encoder/layer_10/attention/self/key/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/value/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/value/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/value/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_10/attention/self/value/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/value/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_10/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_10/attention/self/value/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/value/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_10/attention/self/value/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_10/attention/self/value/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/value/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/value/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/value/kernel"
+ input: "bert/encoder/layer_10/attention/self/value/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/value/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/attention/self/value/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/value/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/value/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/value/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/value/bias"
+ input: "bert/encoder/layer_10/attention/self/value/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/value/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/attention/self/value/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/value/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_10/attention/self/value/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/value/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_10/attention/self/value/MatMul"
+ input: "bert/encoder/layer_10/attention/self/value/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/Reshape/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/Reshape"
+ op: "Reshape"
+ input: "bert/encoder/layer_10/attention/self/query/BiasAdd"
+ input: "bert/encoder/layer_10/attention/self/Reshape/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/transpose/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/transpose"
+ op: "Transpose"
+ input: "bert/encoder/layer_10/attention/self/Reshape"
+ input: "bert/encoder/layer_10/attention/self/transpose/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/Reshape_1/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/Reshape_1"
+ op: "Reshape"
+ input: "bert/encoder/layer_10/attention/self/key/BiasAdd"
+ input: "bert/encoder/layer_10/attention/self/Reshape_1/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/transpose_1/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/transpose_1"
+ op: "Transpose"
+ input: "bert/encoder/layer_10/attention/self/Reshape_1"
+ input: "bert/encoder/layer_10/attention/self/transpose_1/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/MatMul"
+ op: "BatchMatMulV2"
+ input: "bert/encoder/layer_10/attention/self/transpose"
+ input: "bert/encoder/layer_10/attention/self/transpose_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/Mul/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.125
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/Mul"
+ op: "Mul"
+ input: "bert/encoder/layer_10/attention/self/MatMul"
+ input: "bert/encoder/layer_10/attention/self/Mul/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/ExpandDims/dim"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/ExpandDims"
+ op: "ExpandDims"
+ input: "bert/encoder/mul"
+ input: "bert/encoder/layer_10/attention/self/ExpandDims/dim"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tdim"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_10/attention/self/sub/x"
+ input: "bert/encoder/layer_10/attention/self/ExpandDims"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/mul_1/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: -10000.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_10/attention/self/sub"
+ input: "bert/encoder/layer_10/attention/self/mul_1/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/add"
+ op: "Add"
+ input: "bert/encoder/layer_10/attention/self/Mul"
+ input: "bert/encoder/layer_10/attention/self/mul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/Softmax"
+ op: "Softmax"
+ input: "bert/encoder/layer_10/attention/self/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/dropout/rate"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/dropout/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/dropout/random_uniform/min"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/dropout/random_uniform/max"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/dropout/random_uniform/RandomUniform"
+ op: "RandomUniform"
+ input: "bert/encoder/layer_10/attention/self/dropout/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/dropout/random_uniform/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_10/attention/self/dropout/random_uniform/max"
+ input: "bert/encoder/layer_10/attention/self/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/dropout/random_uniform/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_10/attention/self/dropout/random_uniform/RandomUniform"
+ input: "bert/encoder/layer_10/attention/self/dropout/random_uniform/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/dropout/random_uniform"
+ op: "Add"
+ input: "bert/encoder/layer_10/attention/self/dropout/random_uniform/mul"
+ input: "bert/encoder/layer_10/attention/self/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/dropout/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/dropout/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_10/attention/self/dropout/sub/x"
+ input: "bert/encoder/layer_10/attention/self/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/dropout/truediv/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/dropout/truediv"
+ op: "RealDiv"
+ input: "bert/encoder/layer_10/attention/self/dropout/truediv/x"
+ input: "bert/encoder/layer_10/attention/self/dropout/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/dropout/GreaterEqual"
+ op: "GreaterEqual"
+ input: "bert/encoder/layer_10/attention/self/dropout/random_uniform"
+ input: "bert/encoder/layer_10/attention/self/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/dropout/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_10/attention/self/Softmax"
+ input: "bert/encoder/layer_10/attention/self/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/dropout/Cast"
+ op: "Cast"
+ input: "bert/encoder/layer_10/attention/self/dropout/GreaterEqual"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/dropout/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_10/attention/self/dropout/mul"
+ input: "bert/encoder/layer_10/attention/self/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/Reshape_2/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/Reshape_2"
+ op: "Reshape"
+ input: "bert/encoder/layer_10/attention/self/value/BiasAdd"
+ input: "bert/encoder/layer_10/attention/self/Reshape_2/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/transpose_2/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/transpose_2"
+ op: "Transpose"
+ input: "bert/encoder/layer_10/attention/self/Reshape_2"
+ input: "bert/encoder/layer_10/attention/self/transpose_2/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/MatMul_1"
+ op: "BatchMatMulV2"
+ input: "bert/encoder/layer_10/attention/self/dropout/mul_1"
+ input: "bert/encoder/layer_10/attention/self/transpose_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/transpose_3/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/transpose_3"
+ op: "Transpose"
+ input: "bert/encoder/layer_10/attention/self/MatMul_1"
+ input: "bert/encoder/layer_10/attention/self/transpose_3/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/Reshape_3/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/Reshape_3"
+ op: "Reshape"
+ input: "bert/encoder/layer_10/attention/self/transpose_3"
+ input: "bert/encoder/layer_10/attention/self/Reshape_3/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dense/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dense/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dense/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_10/attention/output/dense/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dense/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_10/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_10/attention/output/dense/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dense/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_10/attention/output/dense/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_10/attention/output/dense/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dense/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dense/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/output/dense/kernel"
+ input: "bert/encoder/layer_10/attention/output/dense/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dense/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/attention/output/dense/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dense/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dense/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dense/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/output/dense/bias"
+ input: "bert/encoder/layer_10/attention/output/dense/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dense/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/attention/output/dense/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dense/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_10/attention/self/Reshape_3"
+ input: "bert/encoder/layer_10/attention/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dense/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_10/attention/output/dense/MatMul"
+ input: "bert/encoder/layer_10/attention/output/dense/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dropout/rate"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dropout/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dropout/random_uniform/min"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dropout/random_uniform/max"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dropout/random_uniform/RandomUniform"
+ op: "RandomUniform"
+ input: "bert/encoder/layer_10/attention/output/dropout/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dropout/random_uniform/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_10/attention/output/dropout/random_uniform/max"
+ input: "bert/encoder/layer_10/attention/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dropout/random_uniform/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_10/attention/output/dropout/random_uniform/RandomUniform"
+ input: "bert/encoder/layer_10/attention/output/dropout/random_uniform/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dropout/random_uniform"
+ op: "Add"
+ input: "bert/encoder/layer_10/attention/output/dropout/random_uniform/mul"
+ input: "bert/encoder/layer_10/attention/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dropout/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dropout/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_10/attention/output/dropout/sub/x"
+ input: "bert/encoder/layer_10/attention/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dropout/truediv/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dropout/truediv"
+ op: "RealDiv"
+ input: "bert/encoder/layer_10/attention/output/dropout/truediv/x"
+ input: "bert/encoder/layer_10/attention/output/dropout/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dropout/GreaterEqual"
+ op: "GreaterEqual"
+ input: "bert/encoder/layer_10/attention/output/dropout/random_uniform"
+ input: "bert/encoder/layer_10/attention/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dropout/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_10/attention/output/dense/BiasAdd"
+ input: "bert/encoder/layer_10/attention/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dropout/Cast"
+ op: "Cast"
+ input: "bert/encoder/layer_10/attention/output/dropout/GreaterEqual"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dropout/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_10/attention/output/dropout/mul"
+ input: "bert/encoder/layer_10/attention/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/add"
+ op: "Add"
+ input: "bert/encoder/layer_10/attention/output/dropout/mul_1"
+ input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/LayerNorm/beta/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/LayerNorm/beta"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/LayerNorm/beta/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/beta"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/LayerNorm/beta/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/beta"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/Initializer/ones"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/LayerNorm/gamma"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/Initializer/ones"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/LayerNorm/moments/mean/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/LayerNorm/moments/mean"
+ op: "Mean"
+ input: "bert/encoder/layer_10/attention/output/add"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/moments/mean/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/LayerNorm/moments/StopGradient"
+ op: "StopGradient"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/moments/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference"
+ op: "SquaredDifference"
+ input: "bert/encoder/layer_10/attention/output/add"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/moments/StopGradient"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/LayerNorm/moments/variance/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/LayerNorm/moments/variance"
+ op: "Mean"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/moments/variance/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999960041972e-13
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add"
+ op: "Add"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/moments/variance"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/Rsqrt"
+ op: "Rsqrt"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/Rsqrt"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_10/attention/output/add"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2"
+ op: "Mul"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/moments/mean"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/read"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_1"
+ op: "Add"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_1"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_10/intermediate/dense/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_10/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_10/intermediate/dense/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_10/intermediate/dense/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_10/intermediate/dense/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/intermediate/dense/kernel"
+ input: "bert/encoder/layer_10/intermediate/dense/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/intermediate/dense/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/bias/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 3072
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/bias/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/bias/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_10/intermediate/dense/bias/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_10/intermediate/dense/bias/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/intermediate/dense/bias"
+ input: "bert/encoder/layer_10/intermediate/dense/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/intermediate/dense/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_10/intermediate/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_10/intermediate/dense/MatMul"
+ input: "bert/encoder/layer_10/intermediate/dense/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/Pow/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 3.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/Pow"
+ op: "Pow"
+ input: "bert/encoder/layer_10/intermediate/dense/BiasAdd"
+ input: "bert/encoder/layer_10/intermediate/dense/Pow/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/mul/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.044714998453855515
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_10/intermediate/dense/mul/x"
+ input: "bert/encoder/layer_10/intermediate/dense/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/add"
+ op: "Add"
+ input: "bert/encoder/layer_10/intermediate/dense/BiasAdd"
+ input: "bert/encoder/layer_10/intermediate/dense/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/mul_1/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.7978845834732056
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_10/intermediate/dense/mul_1/x"
+ input: "bert/encoder/layer_10/intermediate/dense/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/Tanh"
+ op: "Tanh"
+ input: "bert/encoder/layer_10/intermediate/dense/mul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/add_1/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/add_1"
+ op: "Add"
+ input: "bert/encoder/layer_10/intermediate/dense/add_1/x"
+ input: "bert/encoder/layer_10/intermediate/dense/Tanh"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/mul_2/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.5
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/mul_2"
+ op: "Mul"
+ input: "bert/encoder/layer_10/intermediate/dense/mul_2/x"
+ input: "bert/encoder/layer_10/intermediate/dense/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/mul_3"
+ op: "Mul"
+ input: "bert/encoder/layer_10/intermediate/dense/BiasAdd"
+ input: "bert/encoder/layer_10/intermediate/dense/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dense/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\014\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dense/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dense/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_10/output/dense/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dense/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_10/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_10/output/dense/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dense/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_10/output/dense/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_10/output/dense/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dense/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dense/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/output/dense/kernel"
+ input: "bert/encoder/layer_10/output/dense/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dense/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/output/dense/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dense/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dense/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dense/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/output/dense/bias"
+ input: "bert/encoder/layer_10/output/dense/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dense/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/output/dense/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dense/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_10/intermediate/dense/mul_3"
+ input: "bert/encoder/layer_10/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dense/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_10/output/dense/MatMul"
+ input: "bert/encoder/layer_10/output/dense/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dropout/rate"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dropout/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dropout/random_uniform/min"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dropout/random_uniform/max"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dropout/random_uniform/RandomUniform"
+ op: "RandomUniform"
+ input: "bert/encoder/layer_10/output/dropout/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dropout/random_uniform/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_10/output/dropout/random_uniform/max"
+ input: "bert/encoder/layer_10/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dropout/random_uniform/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_10/output/dropout/random_uniform/RandomUniform"
+ input: "bert/encoder/layer_10/output/dropout/random_uniform/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dropout/random_uniform"
+ op: "Add"
+ input: "bert/encoder/layer_10/output/dropout/random_uniform/mul"
+ input: "bert/encoder/layer_10/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dropout/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dropout/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_10/output/dropout/sub/x"
+ input: "bert/encoder/layer_10/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dropout/truediv/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dropout/truediv"
+ op: "RealDiv"
+ input: "bert/encoder/layer_10/output/dropout/truediv/x"
+ input: "bert/encoder/layer_10/output/dropout/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dropout/GreaterEqual"
+ op: "GreaterEqual"
+ input: "bert/encoder/layer_10/output/dropout/random_uniform"
+ input: "bert/encoder/layer_10/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dropout/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_10/output/dense/BiasAdd"
+ input: "bert/encoder/layer_10/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dropout/Cast"
+ op: "Cast"
+ input: "bert/encoder/layer_10/output/dropout/GreaterEqual"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dropout/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_10/output/dropout/mul"
+ input: "bert/encoder/layer_10/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/add"
+ op: "Add"
+ input: "bert/encoder/layer_10/output/dropout/mul_1"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/LayerNorm/beta/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/LayerNorm/beta"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/LayerNorm/beta/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/output/LayerNorm/beta"
+ input: "bert/encoder/layer_10/output/LayerNorm/beta/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/LayerNorm/beta/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/output/LayerNorm/beta"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/LayerNorm/gamma/Initializer/ones"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/LayerNorm/gamma"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/LayerNorm/gamma/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_10/output/LayerNorm/gamma/Initializer/ones"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/LayerNorm/gamma/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/output/LayerNorm/gamma"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/LayerNorm/moments/mean/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/LayerNorm/moments/mean"
+ op: "Mean"
+ input: "bert/encoder/layer_10/output/add"
+ input: "bert/encoder/layer_10/output/LayerNorm/moments/mean/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/LayerNorm/moments/StopGradient"
+ op: "StopGradient"
+ input: "bert/encoder/layer_10/output/LayerNorm/moments/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference"
+ op: "SquaredDifference"
+ input: "bert/encoder/layer_10/output/add"
+ input: "bert/encoder/layer_10/output/LayerNorm/moments/StopGradient"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/LayerNorm/moments/variance/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/LayerNorm/moments/variance"
+ op: "Mean"
+ input: "bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference"
+ input: "bert/encoder/layer_10/output/LayerNorm/moments/variance/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/LayerNorm/batchnorm/add/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999960041972e-13
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/LayerNorm/batchnorm/add"
+ op: "Add"
+ input: "bert/encoder/layer_10/output/LayerNorm/moments/variance"
+ input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/add/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/LayerNorm/batchnorm/Rsqrt"
+ op: "Rsqrt"
+ input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/LayerNorm/batchnorm/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/Rsqrt"
+ input: "bert/encoder/layer_10/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_10/output/add"
+ input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2"
+ op: "Mul"
+ input: "bert/encoder/layer_10/output/LayerNorm/moments/mean"
+ input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/LayerNorm/batchnorm/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_10/output/LayerNorm/beta/read"
+ input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1"
+ op: "Add"
+ input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_1"
+ input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/query/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/query/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/query/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_11/attention/self/query/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/query/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_11/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_11/attention/self/query/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/query/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_11/attention/self/query/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_11/attention/self/query/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/query/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/query/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/query/kernel"
+ input: "bert/encoder/layer_11/attention/self/query/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/query/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/attention/self/query/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/query/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/query/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/query/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/query/bias"
+ input: "bert/encoder/layer_11/attention/self/query/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/query/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/attention/self/query/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/query/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_11/attention/self/query/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/query/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_11/attention/self/query/MatMul"
+ input: "bert/encoder/layer_11/attention/self/query/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/key/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/key/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/key/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_11/attention/self/key/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/key/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_11/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_11/attention/self/key/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/key/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_11/attention/self/key/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_11/attention/self/key/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/key/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/key/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/key/kernel"
+ input: "bert/encoder/layer_11/attention/self/key/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/key/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/attention/self/key/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/key/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/key/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/key/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/key/bias"
+ input: "bert/encoder/layer_11/attention/self/key/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/key/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/attention/self/key/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/key/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_11/attention/self/key/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/key/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_11/attention/self/key/MatMul"
+ input: "bert/encoder/layer_11/attention/self/key/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/value/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/value/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/value/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_11/attention/self/value/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/value/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_11/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_11/attention/self/value/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/value/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_11/attention/self/value/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_11/attention/self/value/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/value/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/value/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/value/kernel"
+ input: "bert/encoder/layer_11/attention/self/value/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/value/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/attention/self/value/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/value/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/value/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/value/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/value/bias"
+ input: "bert/encoder/layer_11/attention/self/value/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/value/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/attention/self/value/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/value/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_11/attention/self/value/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/value/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_11/attention/self/value/MatMul"
+ input: "bert/encoder/layer_11/attention/self/value/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/Reshape/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/Reshape"
+ op: "Reshape"
+ input: "bert/encoder/layer_11/attention/self/query/BiasAdd"
+ input: "bert/encoder/layer_11/attention/self/Reshape/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/transpose/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/transpose"
+ op: "Transpose"
+ input: "bert/encoder/layer_11/attention/self/Reshape"
+ input: "bert/encoder/layer_11/attention/self/transpose/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/Reshape_1/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/Reshape_1"
+ op: "Reshape"
+ input: "bert/encoder/layer_11/attention/self/key/BiasAdd"
+ input: "bert/encoder/layer_11/attention/self/Reshape_1/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/transpose_1/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/transpose_1"
+ op: "Transpose"
+ input: "bert/encoder/layer_11/attention/self/Reshape_1"
+ input: "bert/encoder/layer_11/attention/self/transpose_1/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/MatMul"
+ op: "BatchMatMulV2"
+ input: "bert/encoder/layer_11/attention/self/transpose"
+ input: "bert/encoder/layer_11/attention/self/transpose_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/Mul/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.125
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/Mul"
+ op: "Mul"
+ input: "bert/encoder/layer_11/attention/self/MatMul"
+ input: "bert/encoder/layer_11/attention/self/Mul/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/ExpandDims/dim"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/ExpandDims"
+ op: "ExpandDims"
+ input: "bert/encoder/mul"
+ input: "bert/encoder/layer_11/attention/self/ExpandDims/dim"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tdim"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_11/attention/self/sub/x"
+ input: "bert/encoder/layer_11/attention/self/ExpandDims"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/mul_1/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: -10000.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_11/attention/self/sub"
+ input: "bert/encoder/layer_11/attention/self/mul_1/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/add"
+ op: "Add"
+ input: "bert/encoder/layer_11/attention/self/Mul"
+ input: "bert/encoder/layer_11/attention/self/mul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/Softmax"
+ op: "Softmax"
+ input: "bert/encoder/layer_11/attention/self/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/dropout/rate"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/dropout/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/dropout/random_uniform/min"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/dropout/random_uniform/max"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/dropout/random_uniform/RandomUniform"
+ op: "RandomUniform"
+ input: "bert/encoder/layer_11/attention/self/dropout/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/dropout/random_uniform/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_11/attention/self/dropout/random_uniform/max"
+ input: "bert/encoder/layer_11/attention/self/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/dropout/random_uniform/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_11/attention/self/dropout/random_uniform/RandomUniform"
+ input: "bert/encoder/layer_11/attention/self/dropout/random_uniform/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/dropout/random_uniform"
+ op: "Add"
+ input: "bert/encoder/layer_11/attention/self/dropout/random_uniform/mul"
+ input: "bert/encoder/layer_11/attention/self/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/dropout/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/dropout/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_11/attention/self/dropout/sub/x"
+ input: "bert/encoder/layer_11/attention/self/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/dropout/truediv/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/dropout/truediv"
+ op: "RealDiv"
+ input: "bert/encoder/layer_11/attention/self/dropout/truediv/x"
+ input: "bert/encoder/layer_11/attention/self/dropout/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/dropout/GreaterEqual"
+ op: "GreaterEqual"
+ input: "bert/encoder/layer_11/attention/self/dropout/random_uniform"
+ input: "bert/encoder/layer_11/attention/self/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/dropout/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_11/attention/self/Softmax"
+ input: "bert/encoder/layer_11/attention/self/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/dropout/Cast"
+ op: "Cast"
+ input: "bert/encoder/layer_11/attention/self/dropout/GreaterEqual"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/dropout/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_11/attention/self/dropout/mul"
+ input: "bert/encoder/layer_11/attention/self/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/Reshape_2/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/Reshape_2"
+ op: "Reshape"
+ input: "bert/encoder/layer_11/attention/self/value/BiasAdd"
+ input: "bert/encoder/layer_11/attention/self/Reshape_2/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/transpose_2/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/transpose_2"
+ op: "Transpose"
+ input: "bert/encoder/layer_11/attention/self/Reshape_2"
+ input: "bert/encoder/layer_11/attention/self/transpose_2/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/MatMul_1"
+ op: "BatchMatMulV2"
+ input: "bert/encoder/layer_11/attention/self/dropout/mul_1"
+ input: "bert/encoder/layer_11/attention/self/transpose_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/transpose_3/perm"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/transpose_3"
+ op: "Transpose"
+ input: "bert/encoder/layer_11/attention/self/MatMul_1"
+ input: "bert/encoder/layer_11/attention/self/transpose_3/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/Reshape_3/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/Reshape_3"
+ op: "Reshape"
+ input: "bert/encoder/layer_11/attention/self/transpose_3"
+ input: "bert/encoder/layer_11/attention/self/Reshape_3/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dense/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dense/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dense/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_11/attention/output/dense/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dense/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_11/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_11/attention/output/dense/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dense/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_11/attention/output/dense/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_11/attention/output/dense/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dense/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dense/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/output/dense/kernel"
+ input: "bert/encoder/layer_11/attention/output/dense/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dense/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/attention/output/dense/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dense/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dense/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dense/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/output/dense/bias"
+ input: "bert/encoder/layer_11/attention/output/dense/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dense/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/attention/output/dense/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dense/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_11/attention/self/Reshape_3"
+ input: "bert/encoder/layer_11/attention/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dense/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_11/attention/output/dense/MatMul"
+ input: "bert/encoder/layer_11/attention/output/dense/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dropout/rate"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dropout/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dropout/random_uniform/min"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dropout/random_uniform/max"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dropout/random_uniform/RandomUniform"
+ op: "RandomUniform"
+ input: "bert/encoder/layer_11/attention/output/dropout/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dropout/random_uniform/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_11/attention/output/dropout/random_uniform/max"
+ input: "bert/encoder/layer_11/attention/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dropout/random_uniform/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_11/attention/output/dropout/random_uniform/RandomUniform"
+ input: "bert/encoder/layer_11/attention/output/dropout/random_uniform/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dropout/random_uniform"
+ op: "Add"
+ input: "bert/encoder/layer_11/attention/output/dropout/random_uniform/mul"
+ input: "bert/encoder/layer_11/attention/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dropout/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dropout/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_11/attention/output/dropout/sub/x"
+ input: "bert/encoder/layer_11/attention/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dropout/truediv/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dropout/truediv"
+ op: "RealDiv"
+ input: "bert/encoder/layer_11/attention/output/dropout/truediv/x"
+ input: "bert/encoder/layer_11/attention/output/dropout/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dropout/GreaterEqual"
+ op: "GreaterEqual"
+ input: "bert/encoder/layer_11/attention/output/dropout/random_uniform"
+ input: "bert/encoder/layer_11/attention/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dropout/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_11/attention/output/dense/BiasAdd"
+ input: "bert/encoder/layer_11/attention/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dropout/Cast"
+ op: "Cast"
+ input: "bert/encoder/layer_11/attention/output/dropout/GreaterEqual"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dropout/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_11/attention/output/dropout/mul"
+ input: "bert/encoder/layer_11/attention/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/add"
+ op: "Add"
+ input: "bert/encoder/layer_11/attention/output/dropout/mul_1"
+ input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/LayerNorm/beta/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/LayerNorm/beta"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/LayerNorm/beta/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/beta"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/LayerNorm/beta/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/beta"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/Initializer/ones"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/LayerNorm/gamma"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/Initializer/ones"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/LayerNorm/moments/mean/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/LayerNorm/moments/mean"
+ op: "Mean"
+ input: "bert/encoder/layer_11/attention/output/add"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/moments/mean/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/LayerNorm/moments/StopGradient"
+ op: "StopGradient"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/moments/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference"
+ op: "SquaredDifference"
+ input: "bert/encoder/layer_11/attention/output/add"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/moments/StopGradient"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/LayerNorm/moments/variance/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/LayerNorm/moments/variance"
+ op: "Mean"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/moments/variance/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999960041972e-13
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add"
+ op: "Add"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/moments/variance"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/Rsqrt"
+ op: "Rsqrt"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/Rsqrt"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_11/attention/output/add"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2"
+ op: "Mul"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/moments/mean"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/read"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_1"
+ op: "Add"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_1"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_11/intermediate/dense/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_11/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_11/intermediate/dense/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_11/intermediate/dense/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_11/intermediate/dense/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/intermediate/dense/kernel"
+ input: "bert/encoder/layer_11/intermediate/dense/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/intermediate/dense/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/bias/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 3072
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/bias/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/bias/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_11/intermediate/dense/bias/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_11/intermediate/dense/bias/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/intermediate/dense/bias"
+ input: "bert/encoder/layer_11/intermediate/dense/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/intermediate/dense/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/layer_11/intermediate/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_11/intermediate/dense/MatMul"
+ input: "bert/encoder/layer_11/intermediate/dense/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/Pow/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 3.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/Pow"
+ op: "Pow"
+ input: "bert/encoder/layer_11/intermediate/dense/BiasAdd"
+ input: "bert/encoder/layer_11/intermediate/dense/Pow/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/mul/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.044714998453855515
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_11/intermediate/dense/mul/x"
+ input: "bert/encoder/layer_11/intermediate/dense/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/add"
+ op: "Add"
+ input: "bert/encoder/layer_11/intermediate/dense/BiasAdd"
+ input: "bert/encoder/layer_11/intermediate/dense/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/mul_1/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.7978845834732056
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_11/intermediate/dense/mul_1/x"
+ input: "bert/encoder/layer_11/intermediate/dense/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/Tanh"
+ op: "Tanh"
+ input: "bert/encoder/layer_11/intermediate/dense/mul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/add_1/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/add_1"
+ op: "Add"
+ input: "bert/encoder/layer_11/intermediate/dense/add_1/x"
+ input: "bert/encoder/layer_11/intermediate/dense/Tanh"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/mul_2/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.5
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/mul_2"
+ op: "Mul"
+ input: "bert/encoder/layer_11/intermediate/dense/mul_2/x"
+ input: "bert/encoder/layer_11/intermediate/dense/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/mul_3"
+ op: "Mul"
+ input: "bert/encoder/layer_11/intermediate/dense/BiasAdd"
+ input: "bert/encoder/layer_11/intermediate/dense/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dense/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\014\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dense/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dense/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/encoder/layer_11/output/dense/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dense/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_11/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/encoder/layer_11/output/dense/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dense/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/encoder/layer_11/output/dense/kernel/Initializer/truncated_normal/mul"
+ input: "bert/encoder/layer_11/output/dense/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dense/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dense/kernel/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/output/dense/kernel"
+ input: "bert/encoder/layer_11/output/dense/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dense/kernel/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/output/dense/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dense/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dense/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dense/bias/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/output/dense/bias"
+ input: "bert/encoder/layer_11/output/dense/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dense/bias/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/output/dense/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dense/MatMul"
+ op: "MatMul"
+ input: "bert/encoder/layer_11/intermediate/dense/mul_3"
+ input: "bert/encoder/layer_11/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dense/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/encoder/layer_11/output/dense/MatMul"
+ input: "bert/encoder/layer_11/output/dense/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dropout/rate"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dropout/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dropout/random_uniform/min"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dropout/random_uniform/max"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dropout/random_uniform/RandomUniform"
+ op: "RandomUniform"
+ input: "bert/encoder/layer_11/output/dropout/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dropout/random_uniform/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_11/output/dropout/random_uniform/max"
+ input: "bert/encoder/layer_11/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dropout/random_uniform/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_11/output/dropout/random_uniform/RandomUniform"
+ input: "bert/encoder/layer_11/output/dropout/random_uniform/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dropout/random_uniform"
+ op: "Add"
+ input: "bert/encoder/layer_11/output/dropout/random_uniform/mul"
+ input: "bert/encoder/layer_11/output/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dropout/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dropout/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_11/output/dropout/sub/x"
+ input: "bert/encoder/layer_11/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dropout/truediv/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dropout/truediv"
+ op: "RealDiv"
+ input: "bert/encoder/layer_11/output/dropout/truediv/x"
+ input: "bert/encoder/layer_11/output/dropout/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dropout/GreaterEqual"
+ op: "GreaterEqual"
+ input: "bert/encoder/layer_11/output/dropout/random_uniform"
+ input: "bert/encoder/layer_11/output/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dropout/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_11/output/dense/BiasAdd"
+ input: "bert/encoder/layer_11/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dropout/Cast"
+ op: "Cast"
+ input: "bert/encoder/layer_11/output/dropout/GreaterEqual"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dropout/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_11/output/dropout/mul"
+ input: "bert/encoder/layer_11/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/add"
+ op: "Add"
+ input: "bert/encoder/layer_11/output/dropout/mul_1"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/LayerNorm/beta/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/LayerNorm/beta"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/LayerNorm/beta/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/output/LayerNorm/beta"
+ input: "bert/encoder/layer_11/output/LayerNorm/beta/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/LayerNorm/beta/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/output/LayerNorm/beta"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/LayerNorm/gamma/Initializer/ones"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/LayerNorm/gamma"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/LayerNorm/gamma/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_11/output/LayerNorm/gamma/Initializer/ones"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/LayerNorm/gamma/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/output/LayerNorm/gamma"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/LayerNorm/moments/mean/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/LayerNorm/moments/mean"
+ op: "Mean"
+ input: "bert/encoder/layer_11/output/add"
+ input: "bert/encoder/layer_11/output/LayerNorm/moments/mean/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/LayerNorm/moments/StopGradient"
+ op: "StopGradient"
+ input: "bert/encoder/layer_11/output/LayerNorm/moments/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference"
+ op: "SquaredDifference"
+ input: "bert/encoder/layer_11/output/add"
+ input: "bert/encoder/layer_11/output/LayerNorm/moments/StopGradient"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/LayerNorm/moments/variance/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/LayerNorm/moments/variance"
+ op: "Mean"
+ input: "bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference"
+ input: "bert/encoder/layer_11/output/LayerNorm/moments/variance/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/LayerNorm/batchnorm/add/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999960041972e-13
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/LayerNorm/batchnorm/add"
+ op: "Add"
+ input: "bert/encoder/layer_11/output/LayerNorm/moments/variance"
+ input: "bert/encoder/layer_11/output/LayerNorm/batchnorm/add/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/LayerNorm/batchnorm/Rsqrt"
+ op: "Rsqrt"
+ input: "bert/encoder/layer_11/output/LayerNorm/batchnorm/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/LayerNorm/batchnorm/mul"
+ op: "Mul"
+ input: "bert/encoder/layer_11/output/LayerNorm/batchnorm/Rsqrt"
+ input: "bert/encoder/layer_11/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_11/output/add"
+ input: "bert/encoder/layer_11/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2"
+ op: "Mul"
+ input: "bert/encoder/layer_11/output/LayerNorm/moments/mean"
+ input: "bert/encoder/layer_11/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/LayerNorm/batchnorm/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_11/output/LayerNorm/beta/read"
+ input: "bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/LayerNorm/batchnorm/add_1"
+ op: "Add"
+ input: "bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_1"
+ input: "bert/encoder/layer_11/output/LayerNorm/batchnorm/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/Reshape_2/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 3
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/Reshape_2"
+ op: "Reshape"
+ input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/Reshape_2/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/Reshape_3/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 3
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/Reshape_3"
+ op: "Reshape"
+ input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/Reshape_3/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/Reshape_4/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 3
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/Reshape_4"
+ op: "Reshape"
+ input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/Reshape_4/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/Reshape_5/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 3
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/Reshape_5"
+ op: "Reshape"
+ input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/Reshape_5/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/Reshape_6/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 3
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/Reshape_6"
+ op: "Reshape"
+ input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/Reshape_6/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/Reshape_7/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 3
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/Reshape_7"
+ op: "Reshape"
+ input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/Reshape_7/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/Reshape_8/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 3
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/Reshape_8"
+ op: "Reshape"
+ input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/Reshape_8/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/Reshape_9/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 3
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/Reshape_9"
+ op: "Reshape"
+ input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/Reshape_9/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/Reshape_10/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 3
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/Reshape_10"
+ op: "Reshape"
+ input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/Reshape_10/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/Reshape_11/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 3
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/Reshape_11"
+ op: "Reshape"
+ input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/Reshape_11/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/Reshape_12/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 3
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/Reshape_12"
+ op: "Reshape"
+ input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/Reshape_12/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/Reshape_13/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 3
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/Reshape_13"
+ op: "Reshape"
+ input: "bert/encoder/layer_11/output/LayerNorm/batchnorm/add_1"
+ input: "bert/encoder/Reshape_13/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/pooler/strided_slice/stack"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 3
+ }
+ }
+ tensor_content: "\000\000\000\000\000\000\000\000\000\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/pooler/strided_slice/stack_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 3
+ }
+ }
+ tensor_content: "\000\000\000\000\001\000\000\000\000\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/pooler/strided_slice/stack_2"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 3
+ }
+ }
+ tensor_content: "\001\000\000\000\001\000\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/pooler/strided_slice"
+ op: "StridedSlice"
+ input: "bert/encoder/Reshape_13"
+ input: "bert/pooler/strided_slice/stack"
+ input: "bert/pooler/strided_slice/stack_1"
+ input: "bert/pooler/strided_slice/stack_2"
+ attr {
+ key: "Index"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "begin_mask"
+ value {
+ i: 5
+ }
+ }
+ attr {
+ key: "ellipsis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "end_mask"
+ value {
+ i: 5
+ }
+ }
+ attr {
+ key: "new_axis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "shrink_axis_mask"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/pooler/Squeeze"
+ op: "Squeeze"
+ input: "bert/pooler/strided_slice"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "squeeze_dims"
+ value {
+ list {
+ i: 1
+ }
+ }
+ }
+}
+node {
+ name: "bert/pooler/dense/kernel/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/pooler/dense/kernel/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/pooler/dense/kernel/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "bert/pooler/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "bert/pooler/dense/kernel/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "bert/pooler/dense/kernel/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "bert/pooler/dense/kernel/Initializer/truncated_normal/TruncatedNormal"
+ input: "bert/pooler/dense/kernel/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/pooler/dense/kernel/Initializer/truncated_normal"
+ op: "Add"
+ input: "bert/pooler/dense/kernel/Initializer/truncated_normal/mul"
+ input: "bert/pooler/dense/kernel/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/pooler/dense/kernel"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/pooler/dense/kernel/Assign"
+ op: "Assign"
+ input: "bert/pooler/dense/kernel"
+ input: "bert/pooler/dense/kernel/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/pooler/dense/kernel/read"
+ op: "Identity"
+ input: "bert/pooler/dense/kernel"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/pooler/dense/bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/pooler/dense/bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/pooler/dense/bias/Assign"
+ op: "Assign"
+ input: "bert/pooler/dense/bias"
+ input: "bert/pooler/dense/bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/pooler/dense/bias/read"
+ op: "Identity"
+ input: "bert/pooler/dense/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/pooler/dense/MatMul"
+ op: "MatMul"
+ input: "bert/pooler/Squeeze"
+ input: "bert/pooler/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "bert/pooler/dense/BiasAdd"
+ op: "BiasAdd"
+ input: "bert/pooler/dense/MatMul"
+ input: "bert/pooler/dense/bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "bert/pooler/dense/Tanh"
+ op: "Tanh"
+ input: "bert/pooler/dense/BiasAdd"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "output_weights/Initializer/truncated_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_weights"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\003\000\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "output_weights/Initializer/truncated_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_weights"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "output_weights/Initializer/truncated_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_weights"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.019999999552965164
+ }
+ }
+ }
+}
+node {
+ name: "output_weights/Initializer/truncated_normal/TruncatedNormal"
+ op: "TruncatedNormal"
+ input: "output_weights/Initializer/truncated_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_weights"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "output_weights/Initializer/truncated_normal/mul"
+ op: "Mul"
+ input: "output_weights/Initializer/truncated_normal/TruncatedNormal"
+ input: "output_weights/Initializer/truncated_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_weights"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "output_weights/Initializer/truncated_normal"
+ op: "Add"
+ input: "output_weights/Initializer/truncated_normal/mul"
+ input: "output_weights/Initializer/truncated_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_weights"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "output_weights"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_weights"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "output_weights/Assign"
+ op: "Assign"
+ input: "output_weights"
+ input: "output_weights/Initializer/truncated_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_weights"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "output_weights/read"
+ op: "Identity"
+ input: "output_weights"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_weights"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "output_bias/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 3
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "output_bias"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "output_bias/Assign"
+ op: "Assign"
+ input: "output_bias"
+ input: "output_bias/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "output_bias/read"
+ op: "Identity"
+ input: "output_bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "loss/dropout/rate"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "loss/dropout/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: " \000\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "loss/dropout/random_uniform/min"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "loss/dropout/random_uniform/max"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "loss/dropout/random_uniform/RandomUniform"
+ op: "RandomUniform"
+ input: "loss/dropout/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "loss/dropout/random_uniform/sub"
+ op: "Sub"
+ input: "loss/dropout/random_uniform/max"
+ input: "loss/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "loss/dropout/random_uniform/mul"
+ op: "Mul"
+ input: "loss/dropout/random_uniform/RandomUniform"
+ input: "loss/dropout/random_uniform/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "loss/dropout/random_uniform"
+ op: "Add"
+ input: "loss/dropout/random_uniform/mul"
+ input: "loss/dropout/random_uniform/min"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "loss/dropout/sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "loss/dropout/sub"
+ op: "Sub"
+ input: "loss/dropout/sub/x"
+ input: "loss/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "loss/dropout/truediv/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "loss/dropout/truediv"
+ op: "RealDiv"
+ input: "loss/dropout/truediv/x"
+ input: "loss/dropout/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "loss/dropout/GreaterEqual"
+ op: "GreaterEqual"
+ input: "loss/dropout/random_uniform"
+ input: "loss/dropout/rate"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "loss/dropout/mul"
+ op: "Mul"
+ input: "bert/pooler/dense/Tanh"
+ input: "loss/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "loss/dropout/Cast"
+ op: "Cast"
+ input: "loss/dropout/GreaterEqual"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "loss/dropout/mul_1"
+ op: "Mul"
+ input: "loss/dropout/mul"
+ input: "loss/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "loss/MatMul"
+ op: "MatMul"
+ input: "loss/dropout/mul_1"
+ input: "output_weights/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "loss/BiasAdd"
+ op: "BiasAdd"
+ input: "loss/MatMul"
+ input: "output_bias/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "loss/Softmax"
+ op: "Softmax"
+ input: "loss/BiasAdd"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "loss/LogSoftmax"
+ op: "LogSoftmax"
+ input: "loss/BiasAdd"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "loss/one_hot/on_value"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "loss/one_hot/off_value"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "loss/one_hot/depth"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 3
+ }
+ }
+ }
+}
+node {
+ name: "loss/one_hot"
+ op: "OneHot"
+ input: "IteratorGetNext:3"
+ input: "loss/one_hot/depth"
+ input: "loss/one_hot/on_value"
+ input: "loss/one_hot/off_value"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "TI"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "axis"
+ value {
+ i: -1
+ }
+ }
+}
+node {
+ name: "loss/mul"
+ op: "Mul"
+ input: "loss/one_hot"
+ input: "loss/LogSoftmax"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "loss/Sum/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: -1
+ }
+ }
+ }
+}
+node {
+ name: "loss/Sum"
+ op: "Sum"
+ input: "loss/mul"
+ input: "loss/Sum/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "loss/Neg"
+ op: "Neg"
+ input: "loss/Sum"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "loss/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "loss/Mean"
+ op: "Mean"
+ input: "loss/Neg"
+ input: "loss/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/embeddings/LayerNorm/beta"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer"
+ op: "RestoreV2"
+ input: "checkpoint_initializer/prefix"
+ input: "checkpoint_initializer/tensor_names"
+ input: "checkpoint_initializer/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign"
+ op: "Assign"
+ input: "bert/embeddings/LayerNorm/beta"
+ input: "checkpoint_initializer"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_1/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_1/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/embeddings/LayerNorm/gamma"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_1/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_1"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_1/prefix"
+ input: "checkpoint_initializer_1/tensor_names"
+ input: "checkpoint_initializer_1/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_1"
+ op: "Assign"
+ input: "bert/embeddings/LayerNorm/gamma"
+ input: "checkpoint_initializer_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_2/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_2/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/embeddings/position_embeddings"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_2/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_2"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_2/prefix"
+ input: "checkpoint_initializer_2/tensor_names"
+ input: "checkpoint_initializer_2/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_2"
+ op: "Assign"
+ input: "bert/embeddings/position_embeddings"
+ input: "checkpoint_initializer_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/position_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_3/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_3/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/embeddings/token_type_embeddings"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_3/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_3"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_3/prefix"
+ input: "checkpoint_initializer_3/tensor_names"
+ input: "checkpoint_initializer_3/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_3"
+ op: "Assign"
+ input: "bert/embeddings/token_type_embeddings"
+ input: "checkpoint_initializer_3"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/token_type_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_4/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_4/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/embeddings/word_embeddings"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_4/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_4"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_4/prefix"
+ input: "checkpoint_initializer_4/tensor_names"
+ input: "checkpoint_initializer_4/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 21128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_4"
+ op: "Assign"
+ input: "bert/embeddings/word_embeddings"
+ input: "checkpoint_initializer_4"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/word_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 21128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_5/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_5/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_0/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_5/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_5"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_5/prefix"
+ input: "checkpoint_initializer_5/tensor_names"
+ input: "checkpoint_initializer_5/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_5"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/beta"
+ input: "checkpoint_initializer_5"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_6/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_6/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_0/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_6/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_6"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_6/prefix"
+ input: "checkpoint_initializer_6/tensor_names"
+ input: "checkpoint_initializer_6/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_6"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma"
+ input: "checkpoint_initializer_6"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_7/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_7/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_0/attention/output/dense/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_7/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_7"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_7/prefix"
+ input: "checkpoint_initializer_7/tensor_names"
+ input: "checkpoint_initializer_7/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_7"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/output/dense/bias"
+ input: "checkpoint_initializer_7"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_8/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_8/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_0/attention/output/dense/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_8/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_8"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_8/prefix"
+ input: "checkpoint_initializer_8/tensor_names"
+ input: "checkpoint_initializer_8/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_8"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/output/dense/kernel"
+ input: "checkpoint_initializer_8"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_9/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_9/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_0/attention/self/key/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_9/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_9"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_9/prefix"
+ input: "checkpoint_initializer_9/tensor_names"
+ input: "checkpoint_initializer_9/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_9"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/key/bias"
+ input: "checkpoint_initializer_9"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_10/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_10/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_0/attention/self/key/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_10/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_10"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_10/prefix"
+ input: "checkpoint_initializer_10/tensor_names"
+ input: "checkpoint_initializer_10/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_10"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/key/kernel"
+ input: "checkpoint_initializer_10"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_11/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_11/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_0/attention/self/query/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_11/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_11"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_11/prefix"
+ input: "checkpoint_initializer_11/tensor_names"
+ input: "checkpoint_initializer_11/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_11"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/query/bias"
+ input: "checkpoint_initializer_11"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_12/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_12/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_0/attention/self/query/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_12/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_12"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_12/prefix"
+ input: "checkpoint_initializer_12/tensor_names"
+ input: "checkpoint_initializer_12/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_12"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/query/kernel"
+ input: "checkpoint_initializer_12"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_13/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_13/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_0/attention/self/value/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_13/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_13"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_13/prefix"
+ input: "checkpoint_initializer_13/tensor_names"
+ input: "checkpoint_initializer_13/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_13"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/value/bias"
+ input: "checkpoint_initializer_13"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_14/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_14/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_0/attention/self/value/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_14/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_14"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_14/prefix"
+ input: "checkpoint_initializer_14/tensor_names"
+ input: "checkpoint_initializer_14/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_14"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/value/kernel"
+ input: "checkpoint_initializer_14"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_15/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_15/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_0/intermediate/dense/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_15/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_15"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_15/prefix"
+ input: "checkpoint_initializer_15/tensor_names"
+ input: "checkpoint_initializer_15/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_15"
+ op: "Assign"
+ input: "bert/encoder/layer_0/intermediate/dense/bias"
+ input: "checkpoint_initializer_15"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_16/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_16/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_0/intermediate/dense/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_16/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_16"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_16/prefix"
+ input: "checkpoint_initializer_16/tensor_names"
+ input: "checkpoint_initializer_16/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_16"
+ op: "Assign"
+ input: "bert/encoder/layer_0/intermediate/dense/kernel"
+ input: "checkpoint_initializer_16"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_17/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_17/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_0/output/LayerNorm/beta"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_17/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_17"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_17/prefix"
+ input: "checkpoint_initializer_17/tensor_names"
+ input: "checkpoint_initializer_17/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_17"
+ op: "Assign"
+ input: "bert/encoder/layer_0/output/LayerNorm/beta"
+ input: "checkpoint_initializer_17"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_18/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_18/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_0/output/LayerNorm/gamma"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_18/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_18"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_18/prefix"
+ input: "checkpoint_initializer_18/tensor_names"
+ input: "checkpoint_initializer_18/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_18"
+ op: "Assign"
+ input: "bert/encoder/layer_0/output/LayerNorm/gamma"
+ input: "checkpoint_initializer_18"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_19/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_19/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_0/output/dense/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_19/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_19"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_19/prefix"
+ input: "checkpoint_initializer_19/tensor_names"
+ input: "checkpoint_initializer_19/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_19"
+ op: "Assign"
+ input: "bert/encoder/layer_0/output/dense/bias"
+ input: "checkpoint_initializer_19"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_20/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_20/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_0/output/dense/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_20/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_20"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_20/prefix"
+ input: "checkpoint_initializer_20/tensor_names"
+ input: "checkpoint_initializer_20/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_20"
+ op: "Assign"
+ input: "bert/encoder/layer_0/output/dense/kernel"
+ input: "checkpoint_initializer_20"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_21/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_21/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_1/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_21/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_21"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_21/prefix"
+ input: "checkpoint_initializer_21/tensor_names"
+ input: "checkpoint_initializer_21/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_21"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/beta"
+ input: "checkpoint_initializer_21"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_22/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_22/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_1/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_22/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_22"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_22/prefix"
+ input: "checkpoint_initializer_22/tensor_names"
+ input: "checkpoint_initializer_22/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_22"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma"
+ input: "checkpoint_initializer_22"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_23/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_23/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_1/attention/output/dense/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_23/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_23"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_23/prefix"
+ input: "checkpoint_initializer_23/tensor_names"
+ input: "checkpoint_initializer_23/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_23"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/output/dense/bias"
+ input: "checkpoint_initializer_23"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_24/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_24/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_1/attention/output/dense/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_24/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_24"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_24/prefix"
+ input: "checkpoint_initializer_24/tensor_names"
+ input: "checkpoint_initializer_24/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_24"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/output/dense/kernel"
+ input: "checkpoint_initializer_24"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_25/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_25/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_1/attention/self/key/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_25/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_25"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_25/prefix"
+ input: "checkpoint_initializer_25/tensor_names"
+ input: "checkpoint_initializer_25/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_25"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/key/bias"
+ input: "checkpoint_initializer_25"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_26/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_26/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_1/attention/self/key/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_26/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_26"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_26/prefix"
+ input: "checkpoint_initializer_26/tensor_names"
+ input: "checkpoint_initializer_26/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_26"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/key/kernel"
+ input: "checkpoint_initializer_26"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_27/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_27/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_1/attention/self/query/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_27/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_27"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_27/prefix"
+ input: "checkpoint_initializer_27/tensor_names"
+ input: "checkpoint_initializer_27/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_27"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/query/bias"
+ input: "checkpoint_initializer_27"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_28/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_28/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_1/attention/self/query/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_28/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_28"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_28/prefix"
+ input: "checkpoint_initializer_28/tensor_names"
+ input: "checkpoint_initializer_28/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_28"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/query/kernel"
+ input: "checkpoint_initializer_28"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_29/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_29/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_1/attention/self/value/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_29/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_29"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_29/prefix"
+ input: "checkpoint_initializer_29/tensor_names"
+ input: "checkpoint_initializer_29/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_29"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/value/bias"
+ input: "checkpoint_initializer_29"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_30/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_30/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_1/attention/self/value/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_30/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_30"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_30/prefix"
+ input: "checkpoint_initializer_30/tensor_names"
+ input: "checkpoint_initializer_30/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_30"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/value/kernel"
+ input: "checkpoint_initializer_30"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_31/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_31/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_1/intermediate/dense/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_31/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_31"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_31/prefix"
+ input: "checkpoint_initializer_31/tensor_names"
+ input: "checkpoint_initializer_31/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_31"
+ op: "Assign"
+ input: "bert/encoder/layer_1/intermediate/dense/bias"
+ input: "checkpoint_initializer_31"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_32/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_32/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_1/intermediate/dense/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_32/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_32"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_32/prefix"
+ input: "checkpoint_initializer_32/tensor_names"
+ input: "checkpoint_initializer_32/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_32"
+ op: "Assign"
+ input: "bert/encoder/layer_1/intermediate/dense/kernel"
+ input: "checkpoint_initializer_32"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_33/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_33/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_1/output/LayerNorm/beta"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_33/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_33"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_33/prefix"
+ input: "checkpoint_initializer_33/tensor_names"
+ input: "checkpoint_initializer_33/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_33"
+ op: "Assign"
+ input: "bert/encoder/layer_1/output/LayerNorm/beta"
+ input: "checkpoint_initializer_33"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_34/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_34/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_1/output/LayerNorm/gamma"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_34/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_34"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_34/prefix"
+ input: "checkpoint_initializer_34/tensor_names"
+ input: "checkpoint_initializer_34/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_34"
+ op: "Assign"
+ input: "bert/encoder/layer_1/output/LayerNorm/gamma"
+ input: "checkpoint_initializer_34"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_35/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_35/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_1/output/dense/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_35/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_35"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_35/prefix"
+ input: "checkpoint_initializer_35/tensor_names"
+ input: "checkpoint_initializer_35/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_35"
+ op: "Assign"
+ input: "bert/encoder/layer_1/output/dense/bias"
+ input: "checkpoint_initializer_35"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_36/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_36/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_1/output/dense/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_36/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_36"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_36/prefix"
+ input: "checkpoint_initializer_36/tensor_names"
+ input: "checkpoint_initializer_36/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_36"
+ op: "Assign"
+ input: "bert/encoder/layer_1/output/dense/kernel"
+ input: "checkpoint_initializer_36"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_37/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_37/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_10/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_37/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_37"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_37/prefix"
+ input: "checkpoint_initializer_37/tensor_names"
+ input: "checkpoint_initializer_37/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_37"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/beta"
+ input: "checkpoint_initializer_37"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_38/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_38/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_10/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_38/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_38"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_38/prefix"
+ input: "checkpoint_initializer_38/tensor_names"
+ input: "checkpoint_initializer_38/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_38"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma"
+ input: "checkpoint_initializer_38"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_39/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_39/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_10/attention/output/dense/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_39/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_39"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_39/prefix"
+ input: "checkpoint_initializer_39/tensor_names"
+ input: "checkpoint_initializer_39/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_39"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/output/dense/bias"
+ input: "checkpoint_initializer_39"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_40/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_40/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_10/attention/output/dense/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_40/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_40"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_40/prefix"
+ input: "checkpoint_initializer_40/tensor_names"
+ input: "checkpoint_initializer_40/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_40"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/output/dense/kernel"
+ input: "checkpoint_initializer_40"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_41/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_41/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_10/attention/self/key/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_41/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_41"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_41/prefix"
+ input: "checkpoint_initializer_41/tensor_names"
+ input: "checkpoint_initializer_41/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_41"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/key/bias"
+ input: "checkpoint_initializer_41"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_42/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_42/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_10/attention/self/key/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_42/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_42"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_42/prefix"
+ input: "checkpoint_initializer_42/tensor_names"
+ input: "checkpoint_initializer_42/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_42"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/key/kernel"
+ input: "checkpoint_initializer_42"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_43/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_43/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_10/attention/self/query/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_43/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_43"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_43/prefix"
+ input: "checkpoint_initializer_43/tensor_names"
+ input: "checkpoint_initializer_43/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_43"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/query/bias"
+ input: "checkpoint_initializer_43"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_44/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_44/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_10/attention/self/query/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_44/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_44"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_44/prefix"
+ input: "checkpoint_initializer_44/tensor_names"
+ input: "checkpoint_initializer_44/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_44"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/query/kernel"
+ input: "checkpoint_initializer_44"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_45/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_45/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_10/attention/self/value/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_45/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_45"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_45/prefix"
+ input: "checkpoint_initializer_45/tensor_names"
+ input: "checkpoint_initializer_45/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_45"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/value/bias"
+ input: "checkpoint_initializer_45"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_46/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_46/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_10/attention/self/value/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_46/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_46"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_46/prefix"
+ input: "checkpoint_initializer_46/tensor_names"
+ input: "checkpoint_initializer_46/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_46"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/value/kernel"
+ input: "checkpoint_initializer_46"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_47/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_47/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_10/intermediate/dense/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_47/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_47"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_47/prefix"
+ input: "checkpoint_initializer_47/tensor_names"
+ input: "checkpoint_initializer_47/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_47"
+ op: "Assign"
+ input: "bert/encoder/layer_10/intermediate/dense/bias"
+ input: "checkpoint_initializer_47"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_48/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_48/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_10/intermediate/dense/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_48/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_48"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_48/prefix"
+ input: "checkpoint_initializer_48/tensor_names"
+ input: "checkpoint_initializer_48/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_48"
+ op: "Assign"
+ input: "bert/encoder/layer_10/intermediate/dense/kernel"
+ input: "checkpoint_initializer_48"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_49/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_49/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_10/output/LayerNorm/beta"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_49/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_49"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_49/prefix"
+ input: "checkpoint_initializer_49/tensor_names"
+ input: "checkpoint_initializer_49/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_49"
+ op: "Assign"
+ input: "bert/encoder/layer_10/output/LayerNorm/beta"
+ input: "checkpoint_initializer_49"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_50/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_50/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_10/output/LayerNorm/gamma"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_50/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_50"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_50/prefix"
+ input: "checkpoint_initializer_50/tensor_names"
+ input: "checkpoint_initializer_50/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_50"
+ op: "Assign"
+ input: "bert/encoder/layer_10/output/LayerNorm/gamma"
+ input: "checkpoint_initializer_50"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_51/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_51/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_10/output/dense/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_51/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_51"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_51/prefix"
+ input: "checkpoint_initializer_51/tensor_names"
+ input: "checkpoint_initializer_51/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_51"
+ op: "Assign"
+ input: "bert/encoder/layer_10/output/dense/bias"
+ input: "checkpoint_initializer_51"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_52/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_52/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_10/output/dense/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_52/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_52"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_52/prefix"
+ input: "checkpoint_initializer_52/tensor_names"
+ input: "checkpoint_initializer_52/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_52"
+ op: "Assign"
+ input: "bert/encoder/layer_10/output/dense/kernel"
+ input: "checkpoint_initializer_52"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_53/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_53/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_11/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_53/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_53"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_53/prefix"
+ input: "checkpoint_initializer_53/tensor_names"
+ input: "checkpoint_initializer_53/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_53"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/beta"
+ input: "checkpoint_initializer_53"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_54/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_54/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_11/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_54/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_54"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_54/prefix"
+ input: "checkpoint_initializer_54/tensor_names"
+ input: "checkpoint_initializer_54/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_54"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma"
+ input: "checkpoint_initializer_54"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_55/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_55/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_11/attention/output/dense/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_55/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_55"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_55/prefix"
+ input: "checkpoint_initializer_55/tensor_names"
+ input: "checkpoint_initializer_55/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_55"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/output/dense/bias"
+ input: "checkpoint_initializer_55"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_56/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_56/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_11/attention/output/dense/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_56/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_56"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_56/prefix"
+ input: "checkpoint_initializer_56/tensor_names"
+ input: "checkpoint_initializer_56/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_56"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/output/dense/kernel"
+ input: "checkpoint_initializer_56"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_57/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_57/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_11/attention/self/key/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_57/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_57"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_57/prefix"
+ input: "checkpoint_initializer_57/tensor_names"
+ input: "checkpoint_initializer_57/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_57"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/key/bias"
+ input: "checkpoint_initializer_57"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_58/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_58/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_11/attention/self/key/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_58/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_58"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_58/prefix"
+ input: "checkpoint_initializer_58/tensor_names"
+ input: "checkpoint_initializer_58/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_58"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/key/kernel"
+ input: "checkpoint_initializer_58"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_59/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_59/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_11/attention/self/query/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_59/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_59"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_59/prefix"
+ input: "checkpoint_initializer_59/tensor_names"
+ input: "checkpoint_initializer_59/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_59"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/query/bias"
+ input: "checkpoint_initializer_59"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_60/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_60/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_11/attention/self/query/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_60/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_60"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_60/prefix"
+ input: "checkpoint_initializer_60/tensor_names"
+ input: "checkpoint_initializer_60/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_60"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/query/kernel"
+ input: "checkpoint_initializer_60"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_61/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_61/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_11/attention/self/value/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_61/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_61"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_61/prefix"
+ input: "checkpoint_initializer_61/tensor_names"
+ input: "checkpoint_initializer_61/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_61"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/value/bias"
+ input: "checkpoint_initializer_61"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_62/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_62/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_11/attention/self/value/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_62/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_62"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_62/prefix"
+ input: "checkpoint_initializer_62/tensor_names"
+ input: "checkpoint_initializer_62/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_62"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/value/kernel"
+ input: "checkpoint_initializer_62"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_63/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_63/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_11/intermediate/dense/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_63/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_63"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_63/prefix"
+ input: "checkpoint_initializer_63/tensor_names"
+ input: "checkpoint_initializer_63/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_63"
+ op: "Assign"
+ input: "bert/encoder/layer_11/intermediate/dense/bias"
+ input: "checkpoint_initializer_63"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_64/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_64/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_11/intermediate/dense/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_64/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_64"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_64/prefix"
+ input: "checkpoint_initializer_64/tensor_names"
+ input: "checkpoint_initializer_64/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_64"
+ op: "Assign"
+ input: "bert/encoder/layer_11/intermediate/dense/kernel"
+ input: "checkpoint_initializer_64"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_65/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_65/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_11/output/LayerNorm/beta"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_65/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_65"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_65/prefix"
+ input: "checkpoint_initializer_65/tensor_names"
+ input: "checkpoint_initializer_65/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_65"
+ op: "Assign"
+ input: "bert/encoder/layer_11/output/LayerNorm/beta"
+ input: "checkpoint_initializer_65"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_66/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_66/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_11/output/LayerNorm/gamma"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_66/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_66"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_66/prefix"
+ input: "checkpoint_initializer_66/tensor_names"
+ input: "checkpoint_initializer_66/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_66"
+ op: "Assign"
+ input: "bert/encoder/layer_11/output/LayerNorm/gamma"
+ input: "checkpoint_initializer_66"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_67/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_67/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_11/output/dense/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_67/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_67"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_67/prefix"
+ input: "checkpoint_initializer_67/tensor_names"
+ input: "checkpoint_initializer_67/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_67"
+ op: "Assign"
+ input: "bert/encoder/layer_11/output/dense/bias"
+ input: "checkpoint_initializer_67"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_68/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_68/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_11/output/dense/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_68/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_68"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_68/prefix"
+ input: "checkpoint_initializer_68/tensor_names"
+ input: "checkpoint_initializer_68/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_68"
+ op: "Assign"
+ input: "bert/encoder/layer_11/output/dense/kernel"
+ input: "checkpoint_initializer_68"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_69/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_69/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_2/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_69/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_69"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_69/prefix"
+ input: "checkpoint_initializer_69/tensor_names"
+ input: "checkpoint_initializer_69/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_69"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/beta"
+ input: "checkpoint_initializer_69"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_70/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_70/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_2/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_70/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_70"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_70/prefix"
+ input: "checkpoint_initializer_70/tensor_names"
+ input: "checkpoint_initializer_70/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_70"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma"
+ input: "checkpoint_initializer_70"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_71/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_71/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_2/attention/output/dense/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_71/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_71"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_71/prefix"
+ input: "checkpoint_initializer_71/tensor_names"
+ input: "checkpoint_initializer_71/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_71"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/output/dense/bias"
+ input: "checkpoint_initializer_71"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_72/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_72/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_2/attention/output/dense/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_72/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_72"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_72/prefix"
+ input: "checkpoint_initializer_72/tensor_names"
+ input: "checkpoint_initializer_72/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_72"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/output/dense/kernel"
+ input: "checkpoint_initializer_72"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_73/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_73/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_2/attention/self/key/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_73/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_73"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_73/prefix"
+ input: "checkpoint_initializer_73/tensor_names"
+ input: "checkpoint_initializer_73/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_73"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/key/bias"
+ input: "checkpoint_initializer_73"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_74/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_74/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_2/attention/self/key/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_74/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_74"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_74/prefix"
+ input: "checkpoint_initializer_74/tensor_names"
+ input: "checkpoint_initializer_74/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_74"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/key/kernel"
+ input: "checkpoint_initializer_74"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_75/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_75/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_2/attention/self/query/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_75/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_75"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_75/prefix"
+ input: "checkpoint_initializer_75/tensor_names"
+ input: "checkpoint_initializer_75/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_75"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/query/bias"
+ input: "checkpoint_initializer_75"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_76/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_76/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_2/attention/self/query/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_76/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_76"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_76/prefix"
+ input: "checkpoint_initializer_76/tensor_names"
+ input: "checkpoint_initializer_76/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_76"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/query/kernel"
+ input: "checkpoint_initializer_76"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_77/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_77/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_2/attention/self/value/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_77/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_77"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_77/prefix"
+ input: "checkpoint_initializer_77/tensor_names"
+ input: "checkpoint_initializer_77/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_77"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/value/bias"
+ input: "checkpoint_initializer_77"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_78/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_78/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_2/attention/self/value/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_78/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_78"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_78/prefix"
+ input: "checkpoint_initializer_78/tensor_names"
+ input: "checkpoint_initializer_78/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_78"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/value/kernel"
+ input: "checkpoint_initializer_78"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_79/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_79/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_2/intermediate/dense/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_79/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_79"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_79/prefix"
+ input: "checkpoint_initializer_79/tensor_names"
+ input: "checkpoint_initializer_79/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_79"
+ op: "Assign"
+ input: "bert/encoder/layer_2/intermediate/dense/bias"
+ input: "checkpoint_initializer_79"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_80/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_80/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_2/intermediate/dense/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_80/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_80"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_80/prefix"
+ input: "checkpoint_initializer_80/tensor_names"
+ input: "checkpoint_initializer_80/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_80"
+ op: "Assign"
+ input: "bert/encoder/layer_2/intermediate/dense/kernel"
+ input: "checkpoint_initializer_80"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_81/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_81/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_2/output/LayerNorm/beta"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_81/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_81"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_81/prefix"
+ input: "checkpoint_initializer_81/tensor_names"
+ input: "checkpoint_initializer_81/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_81"
+ op: "Assign"
+ input: "bert/encoder/layer_2/output/LayerNorm/beta"
+ input: "checkpoint_initializer_81"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_82/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_82/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_2/output/LayerNorm/gamma"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_82/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_82"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_82/prefix"
+ input: "checkpoint_initializer_82/tensor_names"
+ input: "checkpoint_initializer_82/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_82"
+ op: "Assign"
+ input: "bert/encoder/layer_2/output/LayerNorm/gamma"
+ input: "checkpoint_initializer_82"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_83/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_83/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_2/output/dense/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_83/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_83"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_83/prefix"
+ input: "checkpoint_initializer_83/tensor_names"
+ input: "checkpoint_initializer_83/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_83"
+ op: "Assign"
+ input: "bert/encoder/layer_2/output/dense/bias"
+ input: "checkpoint_initializer_83"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_84/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_84/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_2/output/dense/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_84/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_84"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_84/prefix"
+ input: "checkpoint_initializer_84/tensor_names"
+ input: "checkpoint_initializer_84/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_84"
+ op: "Assign"
+ input: "bert/encoder/layer_2/output/dense/kernel"
+ input: "checkpoint_initializer_84"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_85/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_85/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_3/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_85/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_85"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_85/prefix"
+ input: "checkpoint_initializer_85/tensor_names"
+ input: "checkpoint_initializer_85/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_85"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/beta"
+ input: "checkpoint_initializer_85"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_86/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_86/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_3/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_86/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_86"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_86/prefix"
+ input: "checkpoint_initializer_86/tensor_names"
+ input: "checkpoint_initializer_86/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_86"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma"
+ input: "checkpoint_initializer_86"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_87/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_87/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_3/attention/output/dense/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_87/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_87"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_87/prefix"
+ input: "checkpoint_initializer_87/tensor_names"
+ input: "checkpoint_initializer_87/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_87"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/output/dense/bias"
+ input: "checkpoint_initializer_87"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_88/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_88/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_3/attention/output/dense/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_88/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_88"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_88/prefix"
+ input: "checkpoint_initializer_88/tensor_names"
+ input: "checkpoint_initializer_88/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_88"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/output/dense/kernel"
+ input: "checkpoint_initializer_88"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_89/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_89/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_3/attention/self/key/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_89/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_89"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_89/prefix"
+ input: "checkpoint_initializer_89/tensor_names"
+ input: "checkpoint_initializer_89/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_89"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/key/bias"
+ input: "checkpoint_initializer_89"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_90/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_90/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_3/attention/self/key/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_90/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_90"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_90/prefix"
+ input: "checkpoint_initializer_90/tensor_names"
+ input: "checkpoint_initializer_90/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_90"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/key/kernel"
+ input: "checkpoint_initializer_90"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_91/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_91/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_3/attention/self/query/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_91/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_91"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_91/prefix"
+ input: "checkpoint_initializer_91/tensor_names"
+ input: "checkpoint_initializer_91/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_91"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/query/bias"
+ input: "checkpoint_initializer_91"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_92/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_92/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_3/attention/self/query/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_92/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_92"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_92/prefix"
+ input: "checkpoint_initializer_92/tensor_names"
+ input: "checkpoint_initializer_92/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_92"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/query/kernel"
+ input: "checkpoint_initializer_92"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_93/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_93/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_3/attention/self/value/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_93/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_93"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_93/prefix"
+ input: "checkpoint_initializer_93/tensor_names"
+ input: "checkpoint_initializer_93/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_93"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/value/bias"
+ input: "checkpoint_initializer_93"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_94/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_94/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_3/attention/self/value/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_94/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_94"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_94/prefix"
+ input: "checkpoint_initializer_94/tensor_names"
+ input: "checkpoint_initializer_94/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_94"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/value/kernel"
+ input: "checkpoint_initializer_94"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_95/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_95/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_3/intermediate/dense/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_95/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_95"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_95/prefix"
+ input: "checkpoint_initializer_95/tensor_names"
+ input: "checkpoint_initializer_95/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_95"
+ op: "Assign"
+ input: "bert/encoder/layer_3/intermediate/dense/bias"
+ input: "checkpoint_initializer_95"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_96/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_96/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_3/intermediate/dense/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_96/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_96"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_96/prefix"
+ input: "checkpoint_initializer_96/tensor_names"
+ input: "checkpoint_initializer_96/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_96"
+ op: "Assign"
+ input: "bert/encoder/layer_3/intermediate/dense/kernel"
+ input: "checkpoint_initializer_96"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_97/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_97/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_3/output/LayerNorm/beta"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_97/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_97"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_97/prefix"
+ input: "checkpoint_initializer_97/tensor_names"
+ input: "checkpoint_initializer_97/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_97"
+ op: "Assign"
+ input: "bert/encoder/layer_3/output/LayerNorm/beta"
+ input: "checkpoint_initializer_97"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_98/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_98/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_3/output/LayerNorm/gamma"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_98/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_98"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_98/prefix"
+ input: "checkpoint_initializer_98/tensor_names"
+ input: "checkpoint_initializer_98/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_98"
+ op: "Assign"
+ input: "bert/encoder/layer_3/output/LayerNorm/gamma"
+ input: "checkpoint_initializer_98"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_99/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_99/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_3/output/dense/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_99/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_99"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_99/prefix"
+ input: "checkpoint_initializer_99/tensor_names"
+ input: "checkpoint_initializer_99/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_99"
+ op: "Assign"
+ input: "bert/encoder/layer_3/output/dense/bias"
+ input: "checkpoint_initializer_99"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_100/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_100/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_3/output/dense/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_100/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_100"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_100/prefix"
+ input: "checkpoint_initializer_100/tensor_names"
+ input: "checkpoint_initializer_100/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_100"
+ op: "Assign"
+ input: "bert/encoder/layer_3/output/dense/kernel"
+ input: "checkpoint_initializer_100"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_101/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_101/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_4/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_101/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_101"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_101/prefix"
+ input: "checkpoint_initializer_101/tensor_names"
+ input: "checkpoint_initializer_101/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_101"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/beta"
+ input: "checkpoint_initializer_101"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_102/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_102/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_4/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_102/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_102"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_102/prefix"
+ input: "checkpoint_initializer_102/tensor_names"
+ input: "checkpoint_initializer_102/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_102"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma"
+ input: "checkpoint_initializer_102"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_103/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_103/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_4/attention/output/dense/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_103/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_103"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_103/prefix"
+ input: "checkpoint_initializer_103/tensor_names"
+ input: "checkpoint_initializer_103/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_103"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/output/dense/bias"
+ input: "checkpoint_initializer_103"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_104/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_104/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_4/attention/output/dense/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_104/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_104"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_104/prefix"
+ input: "checkpoint_initializer_104/tensor_names"
+ input: "checkpoint_initializer_104/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_104"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/output/dense/kernel"
+ input: "checkpoint_initializer_104"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_105/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_105/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_4/attention/self/key/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_105/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_105"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_105/prefix"
+ input: "checkpoint_initializer_105/tensor_names"
+ input: "checkpoint_initializer_105/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_105"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/key/bias"
+ input: "checkpoint_initializer_105"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_106/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_106/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_4/attention/self/key/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_106/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_106"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_106/prefix"
+ input: "checkpoint_initializer_106/tensor_names"
+ input: "checkpoint_initializer_106/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_106"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/key/kernel"
+ input: "checkpoint_initializer_106"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_107/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_107/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_4/attention/self/query/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_107/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_107"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_107/prefix"
+ input: "checkpoint_initializer_107/tensor_names"
+ input: "checkpoint_initializer_107/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_107"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/query/bias"
+ input: "checkpoint_initializer_107"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_108/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_108/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_4/attention/self/query/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_108/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_108"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_108/prefix"
+ input: "checkpoint_initializer_108/tensor_names"
+ input: "checkpoint_initializer_108/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_108"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/query/kernel"
+ input: "checkpoint_initializer_108"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_109/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_109/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_4/attention/self/value/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_109/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_109"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_109/prefix"
+ input: "checkpoint_initializer_109/tensor_names"
+ input: "checkpoint_initializer_109/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_109"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/value/bias"
+ input: "checkpoint_initializer_109"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_110/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_110/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_4/attention/self/value/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_110/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_110"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_110/prefix"
+ input: "checkpoint_initializer_110/tensor_names"
+ input: "checkpoint_initializer_110/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_110"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/value/kernel"
+ input: "checkpoint_initializer_110"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_111/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_111/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_4/intermediate/dense/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_111/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_111"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_111/prefix"
+ input: "checkpoint_initializer_111/tensor_names"
+ input: "checkpoint_initializer_111/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_111"
+ op: "Assign"
+ input: "bert/encoder/layer_4/intermediate/dense/bias"
+ input: "checkpoint_initializer_111"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_112/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_112/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_4/intermediate/dense/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_112/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_112"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_112/prefix"
+ input: "checkpoint_initializer_112/tensor_names"
+ input: "checkpoint_initializer_112/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_112"
+ op: "Assign"
+ input: "bert/encoder/layer_4/intermediate/dense/kernel"
+ input: "checkpoint_initializer_112"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_113/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_113/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_4/output/LayerNorm/beta"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_113/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_113"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_113/prefix"
+ input: "checkpoint_initializer_113/tensor_names"
+ input: "checkpoint_initializer_113/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_113"
+ op: "Assign"
+ input: "bert/encoder/layer_4/output/LayerNorm/beta"
+ input: "checkpoint_initializer_113"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_114/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_114/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_4/output/LayerNorm/gamma"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_114/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_114"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_114/prefix"
+ input: "checkpoint_initializer_114/tensor_names"
+ input: "checkpoint_initializer_114/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_114"
+ op: "Assign"
+ input: "bert/encoder/layer_4/output/LayerNorm/gamma"
+ input: "checkpoint_initializer_114"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_115/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_115/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_4/output/dense/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_115/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_115"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_115/prefix"
+ input: "checkpoint_initializer_115/tensor_names"
+ input: "checkpoint_initializer_115/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_115"
+ op: "Assign"
+ input: "bert/encoder/layer_4/output/dense/bias"
+ input: "checkpoint_initializer_115"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_116/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_116/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_4/output/dense/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_116/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_116"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_116/prefix"
+ input: "checkpoint_initializer_116/tensor_names"
+ input: "checkpoint_initializer_116/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_116"
+ op: "Assign"
+ input: "bert/encoder/layer_4/output/dense/kernel"
+ input: "checkpoint_initializer_116"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_117/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_117/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_5/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_117/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_117"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_117/prefix"
+ input: "checkpoint_initializer_117/tensor_names"
+ input: "checkpoint_initializer_117/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_117"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/beta"
+ input: "checkpoint_initializer_117"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_118/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_118/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_5/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_118/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_118"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_118/prefix"
+ input: "checkpoint_initializer_118/tensor_names"
+ input: "checkpoint_initializer_118/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_118"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma"
+ input: "checkpoint_initializer_118"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_119/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_119/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_5/attention/output/dense/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_119/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_119"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_119/prefix"
+ input: "checkpoint_initializer_119/tensor_names"
+ input: "checkpoint_initializer_119/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_119"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/output/dense/bias"
+ input: "checkpoint_initializer_119"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_120/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_120/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_5/attention/output/dense/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_120/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_120"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_120/prefix"
+ input: "checkpoint_initializer_120/tensor_names"
+ input: "checkpoint_initializer_120/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_120"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/output/dense/kernel"
+ input: "checkpoint_initializer_120"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_121/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_121/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_5/attention/self/key/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_121/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_121"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_121/prefix"
+ input: "checkpoint_initializer_121/tensor_names"
+ input: "checkpoint_initializer_121/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_121"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/key/bias"
+ input: "checkpoint_initializer_121"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_122/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_122/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_5/attention/self/key/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_122/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_122"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_122/prefix"
+ input: "checkpoint_initializer_122/tensor_names"
+ input: "checkpoint_initializer_122/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_122"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/key/kernel"
+ input: "checkpoint_initializer_122"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_123/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_123/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_5/attention/self/query/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_123/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_123"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_123/prefix"
+ input: "checkpoint_initializer_123/tensor_names"
+ input: "checkpoint_initializer_123/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_123"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/query/bias"
+ input: "checkpoint_initializer_123"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_124/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_124/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_5/attention/self/query/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_124/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_124"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_124/prefix"
+ input: "checkpoint_initializer_124/tensor_names"
+ input: "checkpoint_initializer_124/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_124"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/query/kernel"
+ input: "checkpoint_initializer_124"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_125/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_125/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_5/attention/self/value/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_125/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_125"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_125/prefix"
+ input: "checkpoint_initializer_125/tensor_names"
+ input: "checkpoint_initializer_125/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_125"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/value/bias"
+ input: "checkpoint_initializer_125"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_126/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_126/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_5/attention/self/value/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_126/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_126"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_126/prefix"
+ input: "checkpoint_initializer_126/tensor_names"
+ input: "checkpoint_initializer_126/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_126"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/value/kernel"
+ input: "checkpoint_initializer_126"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_127/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_127/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_5/intermediate/dense/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_127/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_127"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_127/prefix"
+ input: "checkpoint_initializer_127/tensor_names"
+ input: "checkpoint_initializer_127/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_127"
+ op: "Assign"
+ input: "bert/encoder/layer_5/intermediate/dense/bias"
+ input: "checkpoint_initializer_127"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_128/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_128/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_5/intermediate/dense/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_128/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_128"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_128/prefix"
+ input: "checkpoint_initializer_128/tensor_names"
+ input: "checkpoint_initializer_128/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_128"
+ op: "Assign"
+ input: "bert/encoder/layer_5/intermediate/dense/kernel"
+ input: "checkpoint_initializer_128"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_129/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_129/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_5/output/LayerNorm/beta"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_129/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_129"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_129/prefix"
+ input: "checkpoint_initializer_129/tensor_names"
+ input: "checkpoint_initializer_129/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_129"
+ op: "Assign"
+ input: "bert/encoder/layer_5/output/LayerNorm/beta"
+ input: "checkpoint_initializer_129"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_130/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_130/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_5/output/LayerNorm/gamma"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_130/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_130"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_130/prefix"
+ input: "checkpoint_initializer_130/tensor_names"
+ input: "checkpoint_initializer_130/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_130"
+ op: "Assign"
+ input: "bert/encoder/layer_5/output/LayerNorm/gamma"
+ input: "checkpoint_initializer_130"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_131/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_131/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_5/output/dense/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_131/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_131"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_131/prefix"
+ input: "checkpoint_initializer_131/tensor_names"
+ input: "checkpoint_initializer_131/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_131"
+ op: "Assign"
+ input: "bert/encoder/layer_5/output/dense/bias"
+ input: "checkpoint_initializer_131"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_132/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_132/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_5/output/dense/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_132/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_132"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_132/prefix"
+ input: "checkpoint_initializer_132/tensor_names"
+ input: "checkpoint_initializer_132/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_132"
+ op: "Assign"
+ input: "bert/encoder/layer_5/output/dense/kernel"
+ input: "checkpoint_initializer_132"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_133/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_133/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_6/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_133/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_133"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_133/prefix"
+ input: "checkpoint_initializer_133/tensor_names"
+ input: "checkpoint_initializer_133/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_133"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/beta"
+ input: "checkpoint_initializer_133"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_134/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_134/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_6/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_134/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_134"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_134/prefix"
+ input: "checkpoint_initializer_134/tensor_names"
+ input: "checkpoint_initializer_134/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_134"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma"
+ input: "checkpoint_initializer_134"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_135/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_135/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_6/attention/output/dense/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_135/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_135"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_135/prefix"
+ input: "checkpoint_initializer_135/tensor_names"
+ input: "checkpoint_initializer_135/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_135"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/output/dense/bias"
+ input: "checkpoint_initializer_135"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_136/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_136/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_6/attention/output/dense/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_136/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_136"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_136/prefix"
+ input: "checkpoint_initializer_136/tensor_names"
+ input: "checkpoint_initializer_136/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_136"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/output/dense/kernel"
+ input: "checkpoint_initializer_136"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_137/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_137/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_6/attention/self/key/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_137/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_137"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_137/prefix"
+ input: "checkpoint_initializer_137/tensor_names"
+ input: "checkpoint_initializer_137/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_137"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/key/bias"
+ input: "checkpoint_initializer_137"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_138/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_138/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_6/attention/self/key/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_138/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_138"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_138/prefix"
+ input: "checkpoint_initializer_138/tensor_names"
+ input: "checkpoint_initializer_138/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_138"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/key/kernel"
+ input: "checkpoint_initializer_138"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_139/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_139/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_6/attention/self/query/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_139/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_139"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_139/prefix"
+ input: "checkpoint_initializer_139/tensor_names"
+ input: "checkpoint_initializer_139/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_139"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/query/bias"
+ input: "checkpoint_initializer_139"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_140/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_140/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_6/attention/self/query/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_140/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_140"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_140/prefix"
+ input: "checkpoint_initializer_140/tensor_names"
+ input: "checkpoint_initializer_140/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_140"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/query/kernel"
+ input: "checkpoint_initializer_140"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_141/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_141/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_6/attention/self/value/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_141/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_141"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_141/prefix"
+ input: "checkpoint_initializer_141/tensor_names"
+ input: "checkpoint_initializer_141/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_141"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/value/bias"
+ input: "checkpoint_initializer_141"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_142/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_142/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_6/attention/self/value/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_142/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_142"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_142/prefix"
+ input: "checkpoint_initializer_142/tensor_names"
+ input: "checkpoint_initializer_142/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_142"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/value/kernel"
+ input: "checkpoint_initializer_142"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_143/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_143/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_6/intermediate/dense/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_143/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_143"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_143/prefix"
+ input: "checkpoint_initializer_143/tensor_names"
+ input: "checkpoint_initializer_143/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_143"
+ op: "Assign"
+ input: "bert/encoder/layer_6/intermediate/dense/bias"
+ input: "checkpoint_initializer_143"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_144/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_144/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_6/intermediate/dense/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_144/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_144"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_144/prefix"
+ input: "checkpoint_initializer_144/tensor_names"
+ input: "checkpoint_initializer_144/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_144"
+ op: "Assign"
+ input: "bert/encoder/layer_6/intermediate/dense/kernel"
+ input: "checkpoint_initializer_144"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_145/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_145/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_6/output/LayerNorm/beta"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_145/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_145"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_145/prefix"
+ input: "checkpoint_initializer_145/tensor_names"
+ input: "checkpoint_initializer_145/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_145"
+ op: "Assign"
+ input: "bert/encoder/layer_6/output/LayerNorm/beta"
+ input: "checkpoint_initializer_145"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_146/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_146/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_6/output/LayerNorm/gamma"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_146/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_146"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_146/prefix"
+ input: "checkpoint_initializer_146/tensor_names"
+ input: "checkpoint_initializer_146/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_146"
+ op: "Assign"
+ input: "bert/encoder/layer_6/output/LayerNorm/gamma"
+ input: "checkpoint_initializer_146"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_147/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_147/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_6/output/dense/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_147/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_147"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_147/prefix"
+ input: "checkpoint_initializer_147/tensor_names"
+ input: "checkpoint_initializer_147/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_147"
+ op: "Assign"
+ input: "bert/encoder/layer_6/output/dense/bias"
+ input: "checkpoint_initializer_147"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_148/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_148/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_6/output/dense/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_148/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_148"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_148/prefix"
+ input: "checkpoint_initializer_148/tensor_names"
+ input: "checkpoint_initializer_148/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_148"
+ op: "Assign"
+ input: "bert/encoder/layer_6/output/dense/kernel"
+ input: "checkpoint_initializer_148"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_149/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_149/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_7/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_149/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_149"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_149/prefix"
+ input: "checkpoint_initializer_149/tensor_names"
+ input: "checkpoint_initializer_149/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_149"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/beta"
+ input: "checkpoint_initializer_149"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_150/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_150/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_7/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_150/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_150"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_150/prefix"
+ input: "checkpoint_initializer_150/tensor_names"
+ input: "checkpoint_initializer_150/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_150"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma"
+ input: "checkpoint_initializer_150"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_151/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_151/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_7/attention/output/dense/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_151/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_151"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_151/prefix"
+ input: "checkpoint_initializer_151/tensor_names"
+ input: "checkpoint_initializer_151/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_151"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/output/dense/bias"
+ input: "checkpoint_initializer_151"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_152/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_152/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_7/attention/output/dense/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_152/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_152"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_152/prefix"
+ input: "checkpoint_initializer_152/tensor_names"
+ input: "checkpoint_initializer_152/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_152"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/output/dense/kernel"
+ input: "checkpoint_initializer_152"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_153/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_153/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_7/attention/self/key/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_153/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_153"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_153/prefix"
+ input: "checkpoint_initializer_153/tensor_names"
+ input: "checkpoint_initializer_153/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_153"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/key/bias"
+ input: "checkpoint_initializer_153"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_154/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_154/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_7/attention/self/key/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_154/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_154"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_154/prefix"
+ input: "checkpoint_initializer_154/tensor_names"
+ input: "checkpoint_initializer_154/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_154"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/key/kernel"
+ input: "checkpoint_initializer_154"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_155/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_155/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_7/attention/self/query/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_155/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_155"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_155/prefix"
+ input: "checkpoint_initializer_155/tensor_names"
+ input: "checkpoint_initializer_155/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_155"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/query/bias"
+ input: "checkpoint_initializer_155"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_156/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_156/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_7/attention/self/query/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_156/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_156"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_156/prefix"
+ input: "checkpoint_initializer_156/tensor_names"
+ input: "checkpoint_initializer_156/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_156"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/query/kernel"
+ input: "checkpoint_initializer_156"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_157/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_157/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_7/attention/self/value/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_157/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_157"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_157/prefix"
+ input: "checkpoint_initializer_157/tensor_names"
+ input: "checkpoint_initializer_157/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_157"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/value/bias"
+ input: "checkpoint_initializer_157"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_158/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_158/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_7/attention/self/value/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_158/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_158"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_158/prefix"
+ input: "checkpoint_initializer_158/tensor_names"
+ input: "checkpoint_initializer_158/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_158"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/value/kernel"
+ input: "checkpoint_initializer_158"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_159/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_159/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_7/intermediate/dense/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_159/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_159"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_159/prefix"
+ input: "checkpoint_initializer_159/tensor_names"
+ input: "checkpoint_initializer_159/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_159"
+ op: "Assign"
+ input: "bert/encoder/layer_7/intermediate/dense/bias"
+ input: "checkpoint_initializer_159"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_160/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_160/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_7/intermediate/dense/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_160/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_160"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_160/prefix"
+ input: "checkpoint_initializer_160/tensor_names"
+ input: "checkpoint_initializer_160/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_160"
+ op: "Assign"
+ input: "bert/encoder/layer_7/intermediate/dense/kernel"
+ input: "checkpoint_initializer_160"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_161/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_161/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_7/output/LayerNorm/beta"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_161/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_161"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_161/prefix"
+ input: "checkpoint_initializer_161/tensor_names"
+ input: "checkpoint_initializer_161/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_161"
+ op: "Assign"
+ input: "bert/encoder/layer_7/output/LayerNorm/beta"
+ input: "checkpoint_initializer_161"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_162/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_162/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_7/output/LayerNorm/gamma"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_162/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_162"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_162/prefix"
+ input: "checkpoint_initializer_162/tensor_names"
+ input: "checkpoint_initializer_162/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_162"
+ op: "Assign"
+ input: "bert/encoder/layer_7/output/LayerNorm/gamma"
+ input: "checkpoint_initializer_162"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_163/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_163/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_7/output/dense/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_163/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_163"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_163/prefix"
+ input: "checkpoint_initializer_163/tensor_names"
+ input: "checkpoint_initializer_163/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_163"
+ op: "Assign"
+ input: "bert/encoder/layer_7/output/dense/bias"
+ input: "checkpoint_initializer_163"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_164/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_164/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_7/output/dense/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_164/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_164"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_164/prefix"
+ input: "checkpoint_initializer_164/tensor_names"
+ input: "checkpoint_initializer_164/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_164"
+ op: "Assign"
+ input: "bert/encoder/layer_7/output/dense/kernel"
+ input: "checkpoint_initializer_164"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_165/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_165/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_8/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_165/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_165"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_165/prefix"
+ input: "checkpoint_initializer_165/tensor_names"
+ input: "checkpoint_initializer_165/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_165"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/beta"
+ input: "checkpoint_initializer_165"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_166/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_166/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_8/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_166/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_166"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_166/prefix"
+ input: "checkpoint_initializer_166/tensor_names"
+ input: "checkpoint_initializer_166/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_166"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma"
+ input: "checkpoint_initializer_166"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_167/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_167/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_8/attention/output/dense/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_167/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_167"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_167/prefix"
+ input: "checkpoint_initializer_167/tensor_names"
+ input: "checkpoint_initializer_167/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_167"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/output/dense/bias"
+ input: "checkpoint_initializer_167"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_168/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_168/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_8/attention/output/dense/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_168/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_168"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_168/prefix"
+ input: "checkpoint_initializer_168/tensor_names"
+ input: "checkpoint_initializer_168/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_168"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/output/dense/kernel"
+ input: "checkpoint_initializer_168"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_169/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_169/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_8/attention/self/key/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_169/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_169"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_169/prefix"
+ input: "checkpoint_initializer_169/tensor_names"
+ input: "checkpoint_initializer_169/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_169"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/key/bias"
+ input: "checkpoint_initializer_169"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_170/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_170/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_8/attention/self/key/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_170/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_170"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_170/prefix"
+ input: "checkpoint_initializer_170/tensor_names"
+ input: "checkpoint_initializer_170/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_170"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/key/kernel"
+ input: "checkpoint_initializer_170"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_171/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_171/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_8/attention/self/query/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_171/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_171"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_171/prefix"
+ input: "checkpoint_initializer_171/tensor_names"
+ input: "checkpoint_initializer_171/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_171"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/query/bias"
+ input: "checkpoint_initializer_171"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_172/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_172/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_8/attention/self/query/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_172/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_172"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_172/prefix"
+ input: "checkpoint_initializer_172/tensor_names"
+ input: "checkpoint_initializer_172/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_172"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/query/kernel"
+ input: "checkpoint_initializer_172"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_173/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_173/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_8/attention/self/value/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_173/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_173"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_173/prefix"
+ input: "checkpoint_initializer_173/tensor_names"
+ input: "checkpoint_initializer_173/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_173"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/value/bias"
+ input: "checkpoint_initializer_173"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_174/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_174/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_8/attention/self/value/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_174/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_174"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_174/prefix"
+ input: "checkpoint_initializer_174/tensor_names"
+ input: "checkpoint_initializer_174/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_174"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/value/kernel"
+ input: "checkpoint_initializer_174"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_175/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_175/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_8/intermediate/dense/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_175/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_175"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_175/prefix"
+ input: "checkpoint_initializer_175/tensor_names"
+ input: "checkpoint_initializer_175/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_175"
+ op: "Assign"
+ input: "bert/encoder/layer_8/intermediate/dense/bias"
+ input: "checkpoint_initializer_175"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_176/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_176/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_8/intermediate/dense/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_176/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_176"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_176/prefix"
+ input: "checkpoint_initializer_176/tensor_names"
+ input: "checkpoint_initializer_176/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_176"
+ op: "Assign"
+ input: "bert/encoder/layer_8/intermediate/dense/kernel"
+ input: "checkpoint_initializer_176"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_177/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_177/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_8/output/LayerNorm/beta"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_177/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_177"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_177/prefix"
+ input: "checkpoint_initializer_177/tensor_names"
+ input: "checkpoint_initializer_177/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_177"
+ op: "Assign"
+ input: "bert/encoder/layer_8/output/LayerNorm/beta"
+ input: "checkpoint_initializer_177"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_178/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_178/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_8/output/LayerNorm/gamma"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_178/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_178"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_178/prefix"
+ input: "checkpoint_initializer_178/tensor_names"
+ input: "checkpoint_initializer_178/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_178"
+ op: "Assign"
+ input: "bert/encoder/layer_8/output/LayerNorm/gamma"
+ input: "checkpoint_initializer_178"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_179/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_179/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_8/output/dense/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_179/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_179"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_179/prefix"
+ input: "checkpoint_initializer_179/tensor_names"
+ input: "checkpoint_initializer_179/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_179"
+ op: "Assign"
+ input: "bert/encoder/layer_8/output/dense/bias"
+ input: "checkpoint_initializer_179"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_180/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_180/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_8/output/dense/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_180/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_180"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_180/prefix"
+ input: "checkpoint_initializer_180/tensor_names"
+ input: "checkpoint_initializer_180/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_180"
+ op: "Assign"
+ input: "bert/encoder/layer_8/output/dense/kernel"
+ input: "checkpoint_initializer_180"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_181/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_181/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_9/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_181/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_181"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_181/prefix"
+ input: "checkpoint_initializer_181/tensor_names"
+ input: "checkpoint_initializer_181/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_181"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/beta"
+ input: "checkpoint_initializer_181"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_182/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_182/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_9/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_182/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_182"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_182/prefix"
+ input: "checkpoint_initializer_182/tensor_names"
+ input: "checkpoint_initializer_182/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_182"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma"
+ input: "checkpoint_initializer_182"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_183/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_183/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_9/attention/output/dense/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_183/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_183"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_183/prefix"
+ input: "checkpoint_initializer_183/tensor_names"
+ input: "checkpoint_initializer_183/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_183"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/output/dense/bias"
+ input: "checkpoint_initializer_183"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_184/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_184/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_9/attention/output/dense/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_184/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_184"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_184/prefix"
+ input: "checkpoint_initializer_184/tensor_names"
+ input: "checkpoint_initializer_184/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_184"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/output/dense/kernel"
+ input: "checkpoint_initializer_184"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_185/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_185/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_9/attention/self/key/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_185/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_185"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_185/prefix"
+ input: "checkpoint_initializer_185/tensor_names"
+ input: "checkpoint_initializer_185/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_185"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/key/bias"
+ input: "checkpoint_initializer_185"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_186/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_186/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_9/attention/self/key/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_186/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_186"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_186/prefix"
+ input: "checkpoint_initializer_186/tensor_names"
+ input: "checkpoint_initializer_186/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_186"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/key/kernel"
+ input: "checkpoint_initializer_186"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_187/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_187/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_9/attention/self/query/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_187/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_187"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_187/prefix"
+ input: "checkpoint_initializer_187/tensor_names"
+ input: "checkpoint_initializer_187/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_187"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/query/bias"
+ input: "checkpoint_initializer_187"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_188/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_188/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_9/attention/self/query/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_188/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_188"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_188/prefix"
+ input: "checkpoint_initializer_188/tensor_names"
+ input: "checkpoint_initializer_188/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_188"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/query/kernel"
+ input: "checkpoint_initializer_188"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_189/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_189/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_9/attention/self/value/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_189/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_189"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_189/prefix"
+ input: "checkpoint_initializer_189/tensor_names"
+ input: "checkpoint_initializer_189/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_189"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/value/bias"
+ input: "checkpoint_initializer_189"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_190/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_190/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_9/attention/self/value/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_190/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_190"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_190/prefix"
+ input: "checkpoint_initializer_190/tensor_names"
+ input: "checkpoint_initializer_190/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_190"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/value/kernel"
+ input: "checkpoint_initializer_190"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_191/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_191/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_9/intermediate/dense/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_191/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_191"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_191/prefix"
+ input: "checkpoint_initializer_191/tensor_names"
+ input: "checkpoint_initializer_191/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_191"
+ op: "Assign"
+ input: "bert/encoder/layer_9/intermediate/dense/bias"
+ input: "checkpoint_initializer_191"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_192/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_192/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_9/intermediate/dense/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_192/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_192"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_192/prefix"
+ input: "checkpoint_initializer_192/tensor_names"
+ input: "checkpoint_initializer_192/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_192"
+ op: "Assign"
+ input: "bert/encoder/layer_9/intermediate/dense/kernel"
+ input: "checkpoint_initializer_192"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_193/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_193/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_9/output/LayerNorm/beta"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_193/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_193"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_193/prefix"
+ input: "checkpoint_initializer_193/tensor_names"
+ input: "checkpoint_initializer_193/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_193"
+ op: "Assign"
+ input: "bert/encoder/layer_9/output/LayerNorm/beta"
+ input: "checkpoint_initializer_193"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_194/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_194/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_9/output/LayerNorm/gamma"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_194/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_194"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_194/prefix"
+ input: "checkpoint_initializer_194/tensor_names"
+ input: "checkpoint_initializer_194/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_194"
+ op: "Assign"
+ input: "bert/encoder/layer_9/output/LayerNorm/gamma"
+ input: "checkpoint_initializer_194"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_195/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_195/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_9/output/dense/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_195/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_195"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_195/prefix"
+ input: "checkpoint_initializer_195/tensor_names"
+ input: "checkpoint_initializer_195/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_195"
+ op: "Assign"
+ input: "bert/encoder/layer_9/output/dense/bias"
+ input: "checkpoint_initializer_195"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_196/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_196/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/encoder/layer_9/output/dense/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_196/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_196"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_196/prefix"
+ input: "checkpoint_initializer_196/tensor_names"
+ input: "checkpoint_initializer_196/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_196"
+ op: "Assign"
+ input: "bert/encoder/layer_9/output/dense/kernel"
+ input: "checkpoint_initializer_196"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_197/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_197/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/pooler/dense/bias"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_197/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_197"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_197/prefix"
+ input: "checkpoint_initializer_197/tensor_names"
+ input: "checkpoint_initializer_197/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_197"
+ op: "Assign"
+ input: "bert/pooler/dense/bias"
+ input: "checkpoint_initializer_197"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_198/prefix"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_198/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: "bert/pooler/dense/kernel"
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_198/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "checkpoint_initializer_198"
+ op: "RestoreV2"
+ input: "checkpoint_initializer_198/prefix"
+ input: "checkpoint_initializer_198/tensor_names"
+ input: "checkpoint_initializer_198/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "Assign_198"
+ op: "Assign"
+ input: "bert/pooler/dense/kernel"
+ input: "checkpoint_initializer_198"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Const_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.9999999494757503e-05
+ }
+ }
+ }
+}
+node {
+ name: "PolynomialDecay/Cast/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "PolynomialDecay/Cast_1/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "PolynomialDecay/Cast_2/ReadVariableOp"
+ op: "ReadVariableOp"
+ input: "global_step"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT64
+ }
+ }
+}
+node {
+ name: "PolynomialDecay/Cast_2"
+ op: "Cast"
+ input: "PolynomialDecay/Cast_2/ReadVariableOp"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_INT64
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "PolynomialDecay/Cast_3/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 14062
+ }
+ }
+ }
+}
+node {
+ name: "PolynomialDecay/Cast_3"
+ op: "Cast"
+ input: "PolynomialDecay/Cast_3/x"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "PolynomialDecay/Minimum/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 14062.0
+ }
+ }
+ }
+}
+node {
+ name: "PolynomialDecay/Minimum"
+ op: "Minimum"
+ input: "PolynomialDecay/Cast_2"
+ input: "PolynomialDecay/Minimum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "PolynomialDecay/div"
+ op: "RealDiv"
+ input: "PolynomialDecay/Minimum"
+ input: "PolynomialDecay/Cast_3"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "PolynomialDecay/sub"
+ op: "Sub"
+ input: "Const_1"
+ input: "PolynomialDecay/Cast/x"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "PolynomialDecay/sub_1/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "PolynomialDecay/sub_1"
+ op: "Sub"
+ input: "PolynomialDecay/sub_1/x"
+ input: "PolynomialDecay/div"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "PolynomialDecay/Pow"
+ op: "Pow"
+ input: "PolynomialDecay/sub_1"
+ input: "PolynomialDecay/Cast_1/x"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "PolynomialDecay/Mul"
+ op: "Mul"
+ input: "PolynomialDecay/sub"
+ input: "PolynomialDecay/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "PolynomialDecay"
+ op: "Add"
+ input: "PolynomialDecay/Mul"
+ input: "PolynomialDecay/Cast/x"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Cast_1/ReadVariableOp"
+ op: "ReadVariableOp"
+ input: "global_step"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT64
+ }
+ }
+}
+node {
+ name: "Cast_1"
+ op: "Cast"
+ input: "Cast_1/ReadVariableOp"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_INT64
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Const_2"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1406
+ }
+ }
+ }
+}
+node {
+ name: "Cast_2"
+ op: "Cast"
+ input: "Cast_1"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Cast_3"
+ op: "Cast"
+ input: "Const_2"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv"
+ op: "RealDiv"
+ input: "Cast_2"
+ input: "Cast_3"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.9999999494757503e-05
+ }
+ }
+ }
+}
+node {
+ name: "mul"
+ op: "Mul"
+ input: "mul/x"
+ input: "truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Less"
+ op: "Less"
+ input: "Cast_1"
+ input: "Const_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Cast_4"
+ op: "Cast"
+ input: "Less"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "sub"
+ op: "Sub"
+ input: "sub/x"
+ input: "Cast_4"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_1"
+ op: "Mul"
+ input: "sub"
+ input: "PolynomialDecay"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_2"
+ op: "Mul"
+ input: "Cast_4"
+ input: "mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add"
+ op: "Add"
+ input: "mul_1"
+ input: "mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/grad_ys_0"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/Fill"
+ op: "Fill"
+ input: "gradients/Shape"
+ input: "gradients/grad_ys_0"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/loss/Mean_grad/Reshape/shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/loss/Mean_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/Fill"
+ input: "gradients/loss/Mean_grad/Reshape/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/loss/Mean_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 32
+ }
+ }
+ }
+}
+node {
+ name: "gradients/loss/Mean_grad/Tile"
+ op: "Tile"
+ input: "gradients/loss/Mean_grad/Reshape"
+ input: "gradients/loss/Mean_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/loss/Mean_grad/Const_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 32.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/loss/Mean_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/loss/Mean_grad/Tile"
+ input: "gradients/loss/Mean_grad/Const_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/loss/Neg_grad/Neg"
+ op: "Neg"
+ input: "gradients/loss/Mean_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/loss/Sum_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: " \000\000\000\003\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/loss/Sum_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/loss/Sum_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/loss/Sum_grad/add"
+ op: "Add"
+ input: "loss/Sum/reduction_indices"
+ input: "gradients/loss/Sum_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/loss/Sum_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/loss/Sum_grad/mod"
+ op: "FloorMod"
+ input: "gradients/loss/Sum_grad/add"
+ input: "gradients/loss/Sum_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/loss/Sum_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/loss/Sum_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/loss/Sum_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/loss/Sum_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/loss/Sum_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/loss/Sum_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/loss/Sum_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/loss/Sum_grad/range"
+ op: "Range"
+ input: "gradients/loss/Sum_grad/range/start"
+ input: "gradients/loss/Sum_grad/Size"
+ input: "gradients/loss/Sum_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/loss/Sum_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/loss/Sum_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/loss/Sum_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/loss/Sum_grad/Fill"
+ op: "Fill"
+ input: "gradients/loss/Sum_grad/Shape_1"
+ input: "gradients/loss/Sum_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/loss/Sum_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/loss/Sum_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/loss/Sum_grad/range"
+ input: "gradients/loss/Sum_grad/mod"
+ input: "gradients/loss/Sum_grad/Shape"
+ input: "gradients/loss/Sum_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/loss/Sum_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/loss/Sum_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/loss/Sum_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/loss/Sum_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/loss/Sum_grad/DynamicStitch"
+ input: "gradients/loss/Sum_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/loss/Sum_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/loss/Sum_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/loss/Sum_grad/Shape"
+ input: "gradients/loss/Sum_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/loss/Sum_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/loss/Sum_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/loss/Neg_grad/Neg"
+ input: "gradients/loss/Sum_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/loss/Sum_grad/Tile"
+ op: "Tile"
+ input: "gradients/loss/Sum_grad/Reshape"
+ input: "gradients/loss/Sum_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/loss/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/loss/Sum_grad/Tile"
+ input: "loss/LogSoftmax"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/loss/mul_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/loss/Sum_grad/Tile"
+ input: "loss/one_hot"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/loss/LogSoftmax_grad/Exp"
+ op: "Exp"
+ input: "loss/LogSoftmax"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/loss/LogSoftmax_grad/Sum/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: -1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/loss/LogSoftmax_grad/Sum"
+ op: "Sum"
+ input: "gradients/loss/mul_grad/Mul_1"
+ input: "gradients/loss/LogSoftmax_grad/Sum/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/loss/LogSoftmax_grad/mul"
+ op: "Mul"
+ input: "gradients/loss/LogSoftmax_grad/Sum"
+ input: "gradients/loss/LogSoftmax_grad/Exp"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/loss/LogSoftmax_grad/sub"
+ op: "Sub"
+ input: "gradients/loss/mul_grad/Mul_1"
+ input: "gradients/loss/LogSoftmax_grad/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/loss/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/loss/LogSoftmax_grad/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/loss/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/loss/LogSoftmax_grad/sub"
+ input: "output_weights/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/loss/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "gradients/loss/LogSoftmax_grad/sub"
+ input: "loss/dropout/mul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/loss/dropout/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/loss/MatMul_grad/MatMul"
+ input: "loss/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/loss/dropout/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/loss/MatMul_grad/MatMul"
+ input: "loss/dropout/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/loss/dropout/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: " \000\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/loss/dropout/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/loss/dropout/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/loss/dropout/mul_grad/Shape"
+ input: "gradients/loss/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/loss/dropout/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/loss/dropout/mul_1_grad/Mul"
+ input: "loss/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/loss/dropout/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/loss/dropout/mul_grad/Mul"
+ input: "gradients/loss/dropout/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/loss/dropout/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/loss/dropout/mul_grad/Sum"
+ input: "gradients/loss/dropout/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/loss/dropout/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/pooler/dense/Tanh"
+ input: "gradients/loss/dropout/mul_1_grad/Mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/loss/dropout/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/loss/dropout/mul_grad/Mul_1"
+ input: "gradients/loss/dropout/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/loss/dropout/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/loss/dropout/mul_grad/Sum_1"
+ input: "gradients/loss/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/pooler/dense/Tanh_grad/TanhGrad"
+ op: "TanhGrad"
+ input: "bert/pooler/dense/Tanh"
+ input: "gradients/loss/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/pooler/dense/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/pooler/dense/Tanh_grad/TanhGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/pooler/dense/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/pooler/dense/Tanh_grad/TanhGrad"
+ input: "bert/pooler/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/pooler/dense/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/pooler/Squeeze"
+ input: "gradients/bert/pooler/dense/Tanh_grad/TanhGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/pooler/Squeeze_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 3
+ }
+ }
+ tensor_content: " \000\000\000\001\000\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/pooler/Squeeze_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/pooler/dense/MatMul_grad/MatMul"
+ input: "gradients/bert/pooler/Squeeze_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/pooler/strided_slice_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 3
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/pooler/strided_slice_grad/StridedSliceGrad"
+ op: "StridedSliceGrad"
+ input: "gradients/bert/pooler/strided_slice_grad/Shape"
+ input: "bert/pooler/strided_slice/stack"
+ input: "bert/pooler/strided_slice/stack_1"
+ input: "bert/pooler/strided_slice/stack_2"
+ input: "gradients/bert/pooler/Squeeze_grad/Reshape"
+ attr {
+ key: "Index"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "begin_mask"
+ value {
+ i: 5
+ }
+ }
+ attr {
+ key: "ellipsis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "end_mask"
+ value {
+ i: 5
+ }
+ }
+ attr {
+ key: "new_axis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "shrink_axis_mask"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/Reshape_13_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/Reshape_13_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/pooler/strided_slice_grad/StridedSliceGrad"
+ input: "gradients/bert/encoder/Reshape_13_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/Reshape_13_grad/Reshape"
+ input: "bert/encoder/layer_11/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/Reshape_13_grad/Reshape"
+ input: "bert/encoder/layer_11/output/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Shape"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/Reshape_13_grad/Reshape"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Sum"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/Reshape_13_grad/Reshape"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Neg"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ input: "bert/encoder/layer_11/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_11/output/LayerNorm/moments/mean"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN"
+ input: "bert/encoder/layer_11/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_11/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/AddN"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ op: "RsqrtGrad"
+ input: "bert/encoder/layer_11/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/Shape"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/Sum"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_11/output/LayerNorm/moments/variance/reduction_indices"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/add"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/range/start"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Size"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/range"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/mod"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/Reshape"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Reshape"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Tile"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ op: "Const"
+ input: "^gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 2.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_11/output/add"
+ input: "bert/encoder/layer_11/output/LayerNorm/moments/StopGradient"
+ input: "^gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_11/output/LayerNorm/moments/mean/reduction_indices"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/add"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/range/start"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Size"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/range"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/mod"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Reshape"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Tile"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_1"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/truediv"
+ attr {
+ key: "N"
+ value {
+ i: 3
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/dropout/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_1"
+ input: "bert/encoder/layer_11/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/dropout/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_1"
+ input: "bert/encoder/layer_11/output/dropout/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_11/output/dropout/mul_1_grad/Mul"
+ input: "bert/encoder/layer_11/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_11/output/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_11/output/dropout/mul_1_grad/Mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/dense/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/dense/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Reshape"
+ input: "bert/encoder/layer_11/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/output/dense/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_11/intermediate/dense/mul_3"
+ input: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_3_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_11/output/dense/MatMul_grad/MatMul"
+ input: "bert/encoder/layer_11/intermediate/dense/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_3_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_11/output/dense/MatMul_grad/MatMul"
+ input: "bert/encoder/layer_11/intermediate/dense/BiasAdd"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/Shape"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_3_grad/Mul_1"
+ input: "bert/encoder/layer_11/intermediate/dense/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/Mul"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/Sum"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_11/intermediate/dense/mul_2/x"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_3_grad/Mul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/add_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/add_1_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/add_1_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/add_1_grad/Shape"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/add_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/add_1_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/add_1_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/add_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/add_1_grad/Sum"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/add_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/add_1_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/add_1_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/add_1_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/add_1_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/add_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/Tanh_grad/TanhGrad"
+ op: "TanhGrad"
+ input: "bert/encoder/layer_11/intermediate/dense/Tanh"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/add_1_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Shape"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/Tanh_grad/TanhGrad"
+ input: "bert/encoder/layer_11/intermediate/dense/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Mul"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Sum"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_11/intermediate/dense/mul_1/x"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/Tanh_grad/TanhGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Reshape_1"
+ input: "bert/encoder/layer_11/intermediate/dense/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_11/intermediate/dense/mul/x"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Shape"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Reshape_1"
+ input: "bert/encoder/layer_11/intermediate/dense/Pow/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/sub/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_11/intermediate/dense/Pow/y"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/sub/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Pow"
+ op: "Pow"
+ input: "bert/encoder/layer_11/intermediate/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/mul"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/mul_1"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Sum"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Greater/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Greater"
+ op: "Greater"
+ input: "bert/encoder/layer_11/intermediate/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Greater/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/ones_like/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/ones_like/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/ones_like"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/ones_like/Shape"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/ones_like/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Select"
+ op: "Select"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Greater"
+ input: "bert/encoder/layer_11/intermediate/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/ones_like"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Log"
+ op: "Log"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/zeros_like/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/zeros_like"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/zeros_like/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Select_1"
+ op: "Select"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Greater"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Log"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/zeros_like"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/mul_2"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Reshape_1"
+ input: "bert/encoder/layer_11/intermediate/dense/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/mul_3"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/mul_2"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Select_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/mul_3"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_2"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_3_grad/Mul"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Reshape"
+ attr {
+ key: "N"
+ value {
+ i: 3
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/intermediate/dense/mul_3_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/AddN_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/AddN_2"
+ input: "bert/encoder/layer_11/intermediate/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/intermediate/dense/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/AddN_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/AddN_3"
+ op: "AddN"
+ input: "gradients/AddN_1"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/MatMul_grad/MatMul"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_3"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_3"
+ input: "bert/encoder/layer_11/attention/output/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Shape"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Sum"
+ op: "Sum"
+ input: "gradients/AddN_3"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Sum"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/AddN_3"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Neg"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/moments/mean"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_4"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_4"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/AddN_4"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ op: "RsqrtGrad"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/Shape"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/Sum"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/moments/variance/reduction_indices"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/add"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/range/start"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Size"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/range"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/mod"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/Reshape"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Reshape"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Tile"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ op: "Const"
+ input: "^gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 2.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_11/attention/output/add"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/moments/StopGradient"
+ input: "^gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/moments/mean/reduction_indices"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/add"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/range/start"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Size"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/range"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/mod"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Reshape"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Tile"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_5"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/truediv"
+ attr {
+ key: "N"
+ value {
+ i: 3
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_5"
+ input: "bert/encoder/layer_11/attention/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_5"
+ input: "bert/encoder/layer_11/attention/output/dropout/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_1_grad/Mul"
+ input: "bert/encoder/layer_11/attention/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_11/attention/output/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_1_grad/Mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/dense/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Reshape"
+ input: "bert/encoder/layer_11/attention/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/output/dense/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_11/attention/self/Reshape_3"
+ input: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/Reshape_3_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/Reshape_3_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/attention/output/dense/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_11/attention/self/Reshape_3_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/transpose_3_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_11/attention/self/transpose_3/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/transpose_3_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_11/attention/self/Reshape_3_grad/Reshape"
+ input: "gradients/bert/encoder/layer_11/attention/self/transpose_3_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/MatMul"
+ op: "BatchMatMulV2"
+ input: "gradients/bert/encoder/layer_11/attention/self/transpose_3_grad/transpose"
+ input: "bert/encoder/layer_11/attention/self/transpose_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/MatMul_1"
+ op: "BatchMatMulV2"
+ input: "bert/encoder/layer_11/attention/self/dropout/mul_1"
+ input: "gradients/bert/encoder/layer_11/attention/self/transpose_3_grad/transpose"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/strided_slice/stack"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/strided_slice/stack_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: -2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/strided_slice/stack_2"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/strided_slice"
+ op: "StridedSlice"
+ input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/Shape"
+ input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/strided_slice/stack"
+ input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/strided_slice/stack_1"
+ input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/strided_slice/stack_2"
+ attr {
+ key: "Index"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "begin_mask"
+ value {
+ i: 1
+ }
+ }
+ attr {
+ key: "ellipsis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "end_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "new_axis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "shrink_axis_mask"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/strided_slice_1/stack"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/strided_slice_1/stack_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: -2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/strided_slice_1/stack_2"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/strided_slice_1"
+ op: "StridedSlice"
+ input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/strided_slice_1/stack"
+ input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/strided_slice_1/stack_1"
+ input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/strided_slice_1/stack_2"
+ attr {
+ key: "Index"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "begin_mask"
+ value {
+ i: 1
+ }
+ }
+ attr {
+ key: "ellipsis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "end_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "new_axis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "shrink_axis_mask"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/strided_slice"
+ input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/strided_slice_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/MatMul"
+ input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/Sum"
+ input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/MatMul_1"
+ input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/Reshape"
+ input: "bert/encoder/layer_11/attention/self/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/Reshape"
+ input: "bert/encoder/layer_11/attention/self/dropout/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/transpose_2_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_11/attention/self/transpose_2/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/transpose_2_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_11/attention/self/transpose_2_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_1_grad/Mul"
+ input: "bert/encoder/layer_11/attention/self/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_11/attention/self/Softmax"
+ input: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_1_grad/Mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/Reshape_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/Reshape_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/attention/self/transpose_2_grad/transpose"
+ input: "gradients/bert/encoder/layer_11/attention/self/Reshape_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/Softmax_grad/mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/Reshape"
+ input: "bert/encoder/layer_11/attention/self/Softmax"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/Softmax_grad/Sum/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: -1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/Softmax_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_11/attention/self/Softmax_grad/mul"
+ input: "gradients/bert/encoder/layer_11/attention/self/Softmax_grad/Sum/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/Softmax_grad/sub"
+ op: "Sub"
+ input: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/Reshape"
+ input: "gradients/bert/encoder/layer_11/attention/self/Softmax_grad/Sum"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/Softmax_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_11/attention/self/Softmax_grad/sub"
+ input: "bert/encoder/layer_11/attention/self/Softmax"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_11/attention/self/Reshape_2_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/add_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/add_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\001\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/add_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_11/attention/self/add_grad/Shape"
+ input: "gradients/bert/encoder/layer_11/attention/self/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/add_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_11/attention/self/Softmax_grad/mul_1"
+ input: "gradients/bert/encoder/layer_11/attention/self/add_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/add_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/attention/self/add_grad/Sum"
+ input: "gradients/bert/encoder/layer_11/attention/self/add_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/add_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_11/attention/self/Softmax_grad/mul_1"
+ input: "gradients/bert/encoder/layer_11/attention/self/add_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/add_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/attention/self/add_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_11/attention/self/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/value/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_11/attention/self/Reshape_2_grad/Reshape"
+ input: "bert/encoder/layer_11/attention/self/value/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/value/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/bert/encoder/layer_11/attention/self/Reshape_2_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_11/attention/self/add_grad/Reshape"
+ input: "bert/encoder/layer_11/attention/self/Mul/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_11/attention/self/MatMul"
+ input: "gradients/bert/encoder/layer_11/attention/self/add_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/MatMul_grad/MatMul"
+ op: "BatchMatMulV2"
+ input: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Reshape"
+ input: "bert/encoder/layer_11/attention/self/transpose_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/MatMul_grad/MatMul_1"
+ op: "BatchMatMulV2"
+ input: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Reshape"
+ input: "bert/encoder/layer_11/attention/self/transpose"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/transpose_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_11/attention/self/transpose/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/transpose_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_11/attention/self/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_11/attention/self/transpose_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/transpose_1_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_11/attention/self/transpose_1/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/transpose_1_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_11/attention/self/MatMul_grad/MatMul_1"
+ input: "gradients/bert/encoder/layer_11/attention/self/transpose_1_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/Reshape_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/Reshape_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/attention/self/transpose_grad/transpose"
+ input: "gradients/bert/encoder/layer_11/attention/self/Reshape_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/Reshape_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/Reshape_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_11/attention/self/transpose_1_grad/transpose"
+ input: "gradients/bert/encoder/layer_11/attention/self/Reshape_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_11/attention/self/Reshape_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_11/attention/self/Reshape_1_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/query/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_11/attention/self/Reshape_grad/Reshape"
+ input: "bert/encoder/layer_11/attention/self/query/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/query/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/bert/encoder/layer_11/attention/self/Reshape_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/key/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_11/attention/self/Reshape_1_grad/Reshape"
+ input: "bert/encoder/layer_11/attention/self/key/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_11/attention/self/key/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/bert/encoder/layer_11/attention/self/Reshape_1_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/AddN_6"
+ op: "AddN"
+ input: "gradients/AddN_5"
+ input: "gradients/bert/encoder/layer_11/attention/self/value/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_11/attention/self/query/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_11/attention/self/key/MatMul_grad/MatMul"
+ attr {
+ key: "N"
+ value {
+ i: 4
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_6"
+ input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_6"
+ input: "bert/encoder/layer_10/output/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Shape"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Sum"
+ op: "Sum"
+ input: "gradients/AddN_6"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Sum"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/AddN_6"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Neg"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_10/output/LayerNorm/moments/mean"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_7"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_7"
+ input: "bert/encoder/layer_10/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/AddN_7"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ op: "RsqrtGrad"
+ input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/Shape"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/Sum"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_10/output/LayerNorm/moments/variance/reduction_indices"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/add"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/range/start"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Size"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/range"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/mod"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/Reshape"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Reshape"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Tile"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ op: "Const"
+ input: "^gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 2.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_10/output/add"
+ input: "bert/encoder/layer_10/output/LayerNorm/moments/StopGradient"
+ input: "^gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_10/output/LayerNorm/moments/mean/reduction_indices"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/add"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/range/start"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Size"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/range"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/mod"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Reshape"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Tile"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_8"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/truediv"
+ attr {
+ key: "N"
+ value {
+ i: 3
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/dropout/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_8"
+ input: "bert/encoder/layer_10/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/dropout/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_8"
+ input: "bert/encoder/layer_10/output/dropout/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_10/output/dropout/mul_1_grad/Mul"
+ input: "bert/encoder/layer_10/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_10/output/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_10/output/dropout/mul_1_grad/Mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/dense/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/dense/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Reshape"
+ input: "bert/encoder/layer_10/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/output/dense/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_10/intermediate/dense/mul_3"
+ input: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_3_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_10/output/dense/MatMul_grad/MatMul"
+ input: "bert/encoder/layer_10/intermediate/dense/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_3_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_10/output/dense/MatMul_grad/MatMul"
+ input: "bert/encoder/layer_10/intermediate/dense/BiasAdd"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/Shape"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_3_grad/Mul_1"
+ input: "bert/encoder/layer_10/intermediate/dense/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/Mul"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/Sum"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_10/intermediate/dense/mul_2/x"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_3_grad/Mul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/add_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/add_1_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/add_1_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/add_1_grad/Shape"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/add_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/add_1_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/add_1_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/add_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/add_1_grad/Sum"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/add_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/add_1_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/add_1_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/add_1_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/add_1_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/add_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/Tanh_grad/TanhGrad"
+ op: "TanhGrad"
+ input: "bert/encoder/layer_10/intermediate/dense/Tanh"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/add_1_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Shape"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/Tanh_grad/TanhGrad"
+ input: "bert/encoder/layer_10/intermediate/dense/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Mul"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Sum"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_10/intermediate/dense/mul_1/x"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/Tanh_grad/TanhGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Reshape_1"
+ input: "bert/encoder/layer_10/intermediate/dense/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_10/intermediate/dense/mul/x"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Shape"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Reshape_1"
+ input: "bert/encoder/layer_10/intermediate/dense/Pow/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/sub/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_10/intermediate/dense/Pow/y"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/sub/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Pow"
+ op: "Pow"
+ input: "bert/encoder/layer_10/intermediate/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/mul"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/mul_1"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Sum"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Greater/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Greater"
+ op: "Greater"
+ input: "bert/encoder/layer_10/intermediate/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Greater/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/ones_like/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/ones_like/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/ones_like"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/ones_like/Shape"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/ones_like/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Select"
+ op: "Select"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Greater"
+ input: "bert/encoder/layer_10/intermediate/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/ones_like"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Log"
+ op: "Log"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/zeros_like/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/zeros_like"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/zeros_like/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Select_1"
+ op: "Select"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Greater"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Log"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/zeros_like"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/mul_2"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Reshape_1"
+ input: "bert/encoder/layer_10/intermediate/dense/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/mul_3"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/mul_2"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Select_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/mul_3"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_9"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_3_grad/Mul"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Reshape"
+ attr {
+ key: "N"
+ value {
+ i: 3
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/intermediate/dense/mul_3_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/AddN_9"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/AddN_9"
+ input: "bert/encoder/layer_10/intermediate/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/intermediate/dense/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/AddN_9"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/AddN_10"
+ op: "AddN"
+ input: "gradients/AddN_8"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/MatMul_grad/MatMul"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_10"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_10"
+ input: "bert/encoder/layer_10/attention/output/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Shape"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Sum"
+ op: "Sum"
+ input: "gradients/AddN_10"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Sum"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/AddN_10"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Neg"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/moments/mean"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_11"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_11"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/AddN_11"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ op: "RsqrtGrad"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/Shape"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/Sum"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/moments/variance/reduction_indices"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/add"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/range/start"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Size"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/range"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/mod"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/Reshape"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Reshape"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Tile"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ op: "Const"
+ input: "^gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 2.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_10/attention/output/add"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/moments/StopGradient"
+ input: "^gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/moments/mean/reduction_indices"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/add"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/range/start"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Size"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/range"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/mod"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Reshape"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Tile"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_12"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/truediv"
+ attr {
+ key: "N"
+ value {
+ i: 3
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_12"
+ input: "bert/encoder/layer_10/attention/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_12"
+ input: "bert/encoder/layer_10/attention/output/dropout/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_1_grad/Mul"
+ input: "bert/encoder/layer_10/attention/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_10/attention/output/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_1_grad/Mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/dense/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Reshape"
+ input: "bert/encoder/layer_10/attention/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/output/dense/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_10/attention/self/Reshape_3"
+ input: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/Reshape_3_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/Reshape_3_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/attention/output/dense/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_10/attention/self/Reshape_3_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/transpose_3_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_10/attention/self/transpose_3/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/transpose_3_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_10/attention/self/Reshape_3_grad/Reshape"
+ input: "gradients/bert/encoder/layer_10/attention/self/transpose_3_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/MatMul"
+ op: "BatchMatMulV2"
+ input: "gradients/bert/encoder/layer_10/attention/self/transpose_3_grad/transpose"
+ input: "bert/encoder/layer_10/attention/self/transpose_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/MatMul_1"
+ op: "BatchMatMulV2"
+ input: "bert/encoder/layer_10/attention/self/dropout/mul_1"
+ input: "gradients/bert/encoder/layer_10/attention/self/transpose_3_grad/transpose"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/strided_slice/stack"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/strided_slice/stack_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: -2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/strided_slice/stack_2"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/strided_slice"
+ op: "StridedSlice"
+ input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/Shape"
+ input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/strided_slice/stack"
+ input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/strided_slice/stack_1"
+ input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/strided_slice/stack_2"
+ attr {
+ key: "Index"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "begin_mask"
+ value {
+ i: 1
+ }
+ }
+ attr {
+ key: "ellipsis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "end_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "new_axis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "shrink_axis_mask"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/strided_slice_1/stack"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/strided_slice_1/stack_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: -2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/strided_slice_1/stack_2"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/strided_slice_1"
+ op: "StridedSlice"
+ input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/strided_slice_1/stack"
+ input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/strided_slice_1/stack_1"
+ input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/strided_slice_1/stack_2"
+ attr {
+ key: "Index"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "begin_mask"
+ value {
+ i: 1
+ }
+ }
+ attr {
+ key: "ellipsis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "end_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "new_axis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "shrink_axis_mask"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/strided_slice"
+ input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/strided_slice_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/MatMul"
+ input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/Sum"
+ input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/MatMul_1"
+ input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/Reshape"
+ input: "bert/encoder/layer_10/attention/self/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/Reshape"
+ input: "bert/encoder/layer_10/attention/self/dropout/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/transpose_2_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_10/attention/self/transpose_2/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/transpose_2_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_10/attention/self/transpose_2_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_1_grad/Mul"
+ input: "bert/encoder/layer_10/attention/self/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_10/attention/self/Softmax"
+ input: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_1_grad/Mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/Reshape_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/Reshape_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/attention/self/transpose_2_grad/transpose"
+ input: "gradients/bert/encoder/layer_10/attention/self/Reshape_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/Softmax_grad/mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/Reshape"
+ input: "bert/encoder/layer_10/attention/self/Softmax"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/Softmax_grad/Sum/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: -1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/Softmax_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_10/attention/self/Softmax_grad/mul"
+ input: "gradients/bert/encoder/layer_10/attention/self/Softmax_grad/Sum/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/Softmax_grad/sub"
+ op: "Sub"
+ input: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/Reshape"
+ input: "gradients/bert/encoder/layer_10/attention/self/Softmax_grad/Sum"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/Softmax_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_10/attention/self/Softmax_grad/sub"
+ input: "bert/encoder/layer_10/attention/self/Softmax"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_10/attention/self/Reshape_2_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/add_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/add_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\001\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/add_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_10/attention/self/add_grad/Shape"
+ input: "gradients/bert/encoder/layer_10/attention/self/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/add_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_10/attention/self/Softmax_grad/mul_1"
+ input: "gradients/bert/encoder/layer_10/attention/self/add_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/add_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/attention/self/add_grad/Sum"
+ input: "gradients/bert/encoder/layer_10/attention/self/add_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/add_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_10/attention/self/Softmax_grad/mul_1"
+ input: "gradients/bert/encoder/layer_10/attention/self/add_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/add_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/attention/self/add_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_10/attention/self/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/value/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_10/attention/self/Reshape_2_grad/Reshape"
+ input: "bert/encoder/layer_10/attention/self/value/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/value/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/bert/encoder/layer_10/attention/self/Reshape_2_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_10/attention/self/add_grad/Reshape"
+ input: "bert/encoder/layer_10/attention/self/Mul/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_10/attention/self/MatMul"
+ input: "gradients/bert/encoder/layer_10/attention/self/add_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/MatMul_grad/MatMul"
+ op: "BatchMatMulV2"
+ input: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Reshape"
+ input: "bert/encoder/layer_10/attention/self/transpose_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/MatMul_grad/MatMul_1"
+ op: "BatchMatMulV2"
+ input: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Reshape"
+ input: "bert/encoder/layer_10/attention/self/transpose"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/transpose_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_10/attention/self/transpose/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/transpose_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_10/attention/self/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_10/attention/self/transpose_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/transpose_1_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_10/attention/self/transpose_1/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/transpose_1_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_10/attention/self/MatMul_grad/MatMul_1"
+ input: "gradients/bert/encoder/layer_10/attention/self/transpose_1_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/Reshape_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/Reshape_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/attention/self/transpose_grad/transpose"
+ input: "gradients/bert/encoder/layer_10/attention/self/Reshape_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/Reshape_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/Reshape_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_10/attention/self/transpose_1_grad/transpose"
+ input: "gradients/bert/encoder/layer_10/attention/self/Reshape_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_10/attention/self/Reshape_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_10/attention/self/Reshape_1_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/query/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_10/attention/self/Reshape_grad/Reshape"
+ input: "bert/encoder/layer_10/attention/self/query/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/query/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/bert/encoder/layer_10/attention/self/Reshape_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/key/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_10/attention/self/Reshape_1_grad/Reshape"
+ input: "bert/encoder/layer_10/attention/self/key/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_10/attention/self/key/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/bert/encoder/layer_10/attention/self/Reshape_1_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/AddN_13"
+ op: "AddN"
+ input: "gradients/AddN_12"
+ input: "gradients/bert/encoder/layer_10/attention/self/value/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_10/attention/self/query/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_10/attention/self/key/MatMul_grad/MatMul"
+ attr {
+ key: "N"
+ value {
+ i: 4
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_13"
+ input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_13"
+ input: "bert/encoder/layer_9/output/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Shape"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Sum"
+ op: "Sum"
+ input: "gradients/AddN_13"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Sum"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/AddN_13"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Neg"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_9/output/LayerNorm/moments/mean"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_14"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_14"
+ input: "bert/encoder/layer_9/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/AddN_14"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ op: "RsqrtGrad"
+ input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/Shape"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/Sum"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_9/output/LayerNorm/moments/variance/reduction_indices"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/add"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/range/start"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Size"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/range"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/mod"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/Reshape"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Reshape"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Tile"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ op: "Const"
+ input: "^gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 2.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_9/output/add"
+ input: "bert/encoder/layer_9/output/LayerNorm/moments/StopGradient"
+ input: "^gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_9/output/LayerNorm/moments/mean/reduction_indices"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/add"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/range/start"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Size"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/range"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/mod"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Reshape"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Tile"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_15"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/truediv"
+ attr {
+ key: "N"
+ value {
+ i: 3
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/dropout/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_15"
+ input: "bert/encoder/layer_9/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/dropout/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_15"
+ input: "bert/encoder/layer_9/output/dropout/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_9/output/dropout/mul_1_grad/Mul"
+ input: "bert/encoder/layer_9/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_9/output/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_9/output/dropout/mul_1_grad/Mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/dense/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/dense/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Reshape"
+ input: "bert/encoder/layer_9/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/output/dense/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_9/intermediate/dense/mul_3"
+ input: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_3_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_9/output/dense/MatMul_grad/MatMul"
+ input: "bert/encoder/layer_9/intermediate/dense/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_3_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_9/output/dense/MatMul_grad/MatMul"
+ input: "bert/encoder/layer_9/intermediate/dense/BiasAdd"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/Shape"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_3_grad/Mul_1"
+ input: "bert/encoder/layer_9/intermediate/dense/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/Mul"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/Sum"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_9/intermediate/dense/mul_2/x"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_3_grad/Mul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/add_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/add_1_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/add_1_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/add_1_grad/Shape"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/add_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/add_1_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/add_1_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/add_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/add_1_grad/Sum"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/add_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/add_1_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/add_1_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/add_1_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/add_1_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/add_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/Tanh_grad/TanhGrad"
+ op: "TanhGrad"
+ input: "bert/encoder/layer_9/intermediate/dense/Tanh"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/add_1_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Shape"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/Tanh_grad/TanhGrad"
+ input: "bert/encoder/layer_9/intermediate/dense/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Mul"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Sum"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_9/intermediate/dense/mul_1/x"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/Tanh_grad/TanhGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Reshape_1"
+ input: "bert/encoder/layer_9/intermediate/dense/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_9/intermediate/dense/mul/x"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Shape"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Reshape_1"
+ input: "bert/encoder/layer_9/intermediate/dense/Pow/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/sub/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_9/intermediate/dense/Pow/y"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/sub/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Pow"
+ op: "Pow"
+ input: "bert/encoder/layer_9/intermediate/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/mul"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/mul_1"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Sum"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Greater/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Greater"
+ op: "Greater"
+ input: "bert/encoder/layer_9/intermediate/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Greater/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/ones_like/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/ones_like/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/ones_like"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/ones_like/Shape"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/ones_like/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Select"
+ op: "Select"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Greater"
+ input: "bert/encoder/layer_9/intermediate/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/ones_like"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Log"
+ op: "Log"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/zeros_like/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/zeros_like"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/zeros_like/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Select_1"
+ op: "Select"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Greater"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Log"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/zeros_like"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/mul_2"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Reshape_1"
+ input: "bert/encoder/layer_9/intermediate/dense/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/mul_3"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/mul_2"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Select_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/mul_3"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_16"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_3_grad/Mul"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Reshape"
+ attr {
+ key: "N"
+ value {
+ i: 3
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/intermediate/dense/mul_3_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/AddN_16"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/AddN_16"
+ input: "bert/encoder/layer_9/intermediate/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/intermediate/dense/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/AddN_16"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/AddN_17"
+ op: "AddN"
+ input: "gradients/AddN_15"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/MatMul_grad/MatMul"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_17"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_17"
+ input: "bert/encoder/layer_9/attention/output/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Shape"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Sum"
+ op: "Sum"
+ input: "gradients/AddN_17"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Sum"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/AddN_17"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Neg"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/moments/mean"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_18"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_18"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/AddN_18"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ op: "RsqrtGrad"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/Shape"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/Sum"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/moments/variance/reduction_indices"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/add"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/range/start"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Size"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/range"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/mod"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/Reshape"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Reshape"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Tile"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ op: "Const"
+ input: "^gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 2.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_9/attention/output/add"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/moments/StopGradient"
+ input: "^gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/moments/mean/reduction_indices"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/add"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/range/start"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Size"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/range"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/mod"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Reshape"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Tile"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_19"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/truediv"
+ attr {
+ key: "N"
+ value {
+ i: 3
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_19"
+ input: "bert/encoder/layer_9/attention/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_19"
+ input: "bert/encoder/layer_9/attention/output/dropout/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_1_grad/Mul"
+ input: "bert/encoder/layer_9/attention/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_9/attention/output/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_1_grad/Mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/dense/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Reshape"
+ input: "bert/encoder/layer_9/attention/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/output/dense/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_9/attention/self/Reshape_3"
+ input: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/Reshape_3_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/Reshape_3_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/attention/output/dense/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_9/attention/self/Reshape_3_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/transpose_3_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_9/attention/self/transpose_3/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/transpose_3_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_9/attention/self/Reshape_3_grad/Reshape"
+ input: "gradients/bert/encoder/layer_9/attention/self/transpose_3_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/MatMul"
+ op: "BatchMatMulV2"
+ input: "gradients/bert/encoder/layer_9/attention/self/transpose_3_grad/transpose"
+ input: "bert/encoder/layer_9/attention/self/transpose_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/MatMul_1"
+ op: "BatchMatMulV2"
+ input: "bert/encoder/layer_9/attention/self/dropout/mul_1"
+ input: "gradients/bert/encoder/layer_9/attention/self/transpose_3_grad/transpose"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/strided_slice/stack"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/strided_slice/stack_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: -2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/strided_slice/stack_2"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/strided_slice"
+ op: "StridedSlice"
+ input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/Shape"
+ input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/strided_slice/stack"
+ input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/strided_slice/stack_1"
+ input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/strided_slice/stack_2"
+ attr {
+ key: "Index"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "begin_mask"
+ value {
+ i: 1
+ }
+ }
+ attr {
+ key: "ellipsis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "end_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "new_axis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "shrink_axis_mask"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/strided_slice_1/stack"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/strided_slice_1/stack_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: -2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/strided_slice_1/stack_2"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/strided_slice_1"
+ op: "StridedSlice"
+ input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/strided_slice_1/stack"
+ input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/strided_slice_1/stack_1"
+ input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/strided_slice_1/stack_2"
+ attr {
+ key: "Index"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "begin_mask"
+ value {
+ i: 1
+ }
+ }
+ attr {
+ key: "ellipsis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "end_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "new_axis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "shrink_axis_mask"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/strided_slice"
+ input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/strided_slice_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/MatMul"
+ input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/Sum"
+ input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/MatMul_1"
+ input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/Reshape"
+ input: "bert/encoder/layer_9/attention/self/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/Reshape"
+ input: "bert/encoder/layer_9/attention/self/dropout/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/transpose_2_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_9/attention/self/transpose_2/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/transpose_2_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_9/attention/self/transpose_2_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_1_grad/Mul"
+ input: "bert/encoder/layer_9/attention/self/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_9/attention/self/Softmax"
+ input: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_1_grad/Mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/Reshape_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/Reshape_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/attention/self/transpose_2_grad/transpose"
+ input: "gradients/bert/encoder/layer_9/attention/self/Reshape_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/Softmax_grad/mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/Reshape"
+ input: "bert/encoder/layer_9/attention/self/Softmax"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/Softmax_grad/Sum/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: -1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/Softmax_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_9/attention/self/Softmax_grad/mul"
+ input: "gradients/bert/encoder/layer_9/attention/self/Softmax_grad/Sum/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/Softmax_grad/sub"
+ op: "Sub"
+ input: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/Reshape"
+ input: "gradients/bert/encoder/layer_9/attention/self/Softmax_grad/Sum"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/Softmax_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_9/attention/self/Softmax_grad/sub"
+ input: "bert/encoder/layer_9/attention/self/Softmax"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_9/attention/self/Reshape_2_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/add_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/add_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\001\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/add_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_9/attention/self/add_grad/Shape"
+ input: "gradients/bert/encoder/layer_9/attention/self/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/add_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_9/attention/self/Softmax_grad/mul_1"
+ input: "gradients/bert/encoder/layer_9/attention/self/add_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/add_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/attention/self/add_grad/Sum"
+ input: "gradients/bert/encoder/layer_9/attention/self/add_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/add_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_9/attention/self/Softmax_grad/mul_1"
+ input: "gradients/bert/encoder/layer_9/attention/self/add_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/add_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/attention/self/add_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_9/attention/self/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/value/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_9/attention/self/Reshape_2_grad/Reshape"
+ input: "bert/encoder/layer_9/attention/self/value/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/value/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/bert/encoder/layer_9/attention/self/Reshape_2_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_9/attention/self/add_grad/Reshape"
+ input: "bert/encoder/layer_9/attention/self/Mul/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_9/attention/self/MatMul"
+ input: "gradients/bert/encoder/layer_9/attention/self/add_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/MatMul_grad/MatMul"
+ op: "BatchMatMulV2"
+ input: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Reshape"
+ input: "bert/encoder/layer_9/attention/self/transpose_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/MatMul_grad/MatMul_1"
+ op: "BatchMatMulV2"
+ input: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Reshape"
+ input: "bert/encoder/layer_9/attention/self/transpose"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/transpose_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_9/attention/self/transpose/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/transpose_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_9/attention/self/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_9/attention/self/transpose_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/transpose_1_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_9/attention/self/transpose_1/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/transpose_1_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_9/attention/self/MatMul_grad/MatMul_1"
+ input: "gradients/bert/encoder/layer_9/attention/self/transpose_1_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/Reshape_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/Reshape_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/attention/self/transpose_grad/transpose"
+ input: "gradients/bert/encoder/layer_9/attention/self/Reshape_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/Reshape_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/Reshape_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_9/attention/self/transpose_1_grad/transpose"
+ input: "gradients/bert/encoder/layer_9/attention/self/Reshape_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_9/attention/self/Reshape_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_9/attention/self/Reshape_1_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/query/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_9/attention/self/Reshape_grad/Reshape"
+ input: "bert/encoder/layer_9/attention/self/query/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/query/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/bert/encoder/layer_9/attention/self/Reshape_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/key/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_9/attention/self/Reshape_1_grad/Reshape"
+ input: "bert/encoder/layer_9/attention/self/key/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_9/attention/self/key/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/bert/encoder/layer_9/attention/self/Reshape_1_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/AddN_20"
+ op: "AddN"
+ input: "gradients/AddN_19"
+ input: "gradients/bert/encoder/layer_9/attention/self/value/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_9/attention/self/query/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_9/attention/self/key/MatMul_grad/MatMul"
+ attr {
+ key: "N"
+ value {
+ i: 4
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_20"
+ input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_20"
+ input: "bert/encoder/layer_8/output/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Shape"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Sum"
+ op: "Sum"
+ input: "gradients/AddN_20"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Sum"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/AddN_20"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Neg"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_8/output/LayerNorm/moments/mean"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_21"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_21"
+ input: "bert/encoder/layer_8/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/AddN_21"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ op: "RsqrtGrad"
+ input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/Shape"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/Sum"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_8/output/LayerNorm/moments/variance/reduction_indices"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/add"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/range/start"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Size"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/range"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/mod"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/Reshape"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Reshape"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Tile"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ op: "Const"
+ input: "^gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 2.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_8/output/add"
+ input: "bert/encoder/layer_8/output/LayerNorm/moments/StopGradient"
+ input: "^gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_8/output/LayerNorm/moments/mean/reduction_indices"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/add"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/range/start"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Size"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/range"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/mod"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Reshape"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Tile"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_22"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/truediv"
+ attr {
+ key: "N"
+ value {
+ i: 3
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/dropout/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_22"
+ input: "bert/encoder/layer_8/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/dropout/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_22"
+ input: "bert/encoder/layer_8/output/dropout/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_8/output/dropout/mul_1_grad/Mul"
+ input: "bert/encoder/layer_8/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_8/output/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_8/output/dropout/mul_1_grad/Mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/dense/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/dense/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Reshape"
+ input: "bert/encoder/layer_8/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/output/dense/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_8/intermediate/dense/mul_3"
+ input: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_3_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_8/output/dense/MatMul_grad/MatMul"
+ input: "bert/encoder/layer_8/intermediate/dense/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_3_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_8/output/dense/MatMul_grad/MatMul"
+ input: "bert/encoder/layer_8/intermediate/dense/BiasAdd"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/Shape"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_3_grad/Mul_1"
+ input: "bert/encoder/layer_8/intermediate/dense/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/Mul"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/Sum"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_8/intermediate/dense/mul_2/x"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_3_grad/Mul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/add_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/add_1_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/add_1_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/add_1_grad/Shape"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/add_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/add_1_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/add_1_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/add_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/add_1_grad/Sum"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/add_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/add_1_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/add_1_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/add_1_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/add_1_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/add_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/Tanh_grad/TanhGrad"
+ op: "TanhGrad"
+ input: "bert/encoder/layer_8/intermediate/dense/Tanh"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/add_1_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Shape"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/Tanh_grad/TanhGrad"
+ input: "bert/encoder/layer_8/intermediate/dense/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Mul"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Sum"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_8/intermediate/dense/mul_1/x"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/Tanh_grad/TanhGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Reshape_1"
+ input: "bert/encoder/layer_8/intermediate/dense/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_8/intermediate/dense/mul/x"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Shape"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Reshape_1"
+ input: "bert/encoder/layer_8/intermediate/dense/Pow/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/sub/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_8/intermediate/dense/Pow/y"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/sub/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Pow"
+ op: "Pow"
+ input: "bert/encoder/layer_8/intermediate/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/mul"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/mul_1"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Sum"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Greater/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Greater"
+ op: "Greater"
+ input: "bert/encoder/layer_8/intermediate/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Greater/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/ones_like/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/ones_like/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/ones_like"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/ones_like/Shape"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/ones_like/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Select"
+ op: "Select"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Greater"
+ input: "bert/encoder/layer_8/intermediate/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/ones_like"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Log"
+ op: "Log"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/zeros_like/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/zeros_like"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/zeros_like/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Select_1"
+ op: "Select"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Greater"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Log"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/zeros_like"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/mul_2"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Reshape_1"
+ input: "bert/encoder/layer_8/intermediate/dense/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/mul_3"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/mul_2"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Select_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/mul_3"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_23"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_3_grad/Mul"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Reshape"
+ attr {
+ key: "N"
+ value {
+ i: 3
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/intermediate/dense/mul_3_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/AddN_23"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/AddN_23"
+ input: "bert/encoder/layer_8/intermediate/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/intermediate/dense/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/AddN_23"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/AddN_24"
+ op: "AddN"
+ input: "gradients/AddN_22"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/MatMul_grad/MatMul"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_24"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_24"
+ input: "bert/encoder/layer_8/attention/output/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Shape"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Sum"
+ op: "Sum"
+ input: "gradients/AddN_24"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Sum"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/AddN_24"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Neg"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/moments/mean"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_25"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_25"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/AddN_25"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ op: "RsqrtGrad"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/Shape"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/Sum"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/moments/variance/reduction_indices"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/add"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/range/start"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Size"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/range"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/mod"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/Reshape"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Reshape"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Tile"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ op: "Const"
+ input: "^gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 2.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_8/attention/output/add"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/moments/StopGradient"
+ input: "^gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/moments/mean/reduction_indices"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/add"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/range/start"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Size"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/range"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/mod"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Reshape"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Tile"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_26"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/truediv"
+ attr {
+ key: "N"
+ value {
+ i: 3
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_26"
+ input: "bert/encoder/layer_8/attention/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_26"
+ input: "bert/encoder/layer_8/attention/output/dropout/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_1_grad/Mul"
+ input: "bert/encoder/layer_8/attention/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_8/attention/output/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_1_grad/Mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/dense/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Reshape"
+ input: "bert/encoder/layer_8/attention/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/output/dense/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_8/attention/self/Reshape_3"
+ input: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/Reshape_3_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/Reshape_3_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/attention/output/dense/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_8/attention/self/Reshape_3_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/transpose_3_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_8/attention/self/transpose_3/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/transpose_3_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_8/attention/self/Reshape_3_grad/Reshape"
+ input: "gradients/bert/encoder/layer_8/attention/self/transpose_3_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/MatMul"
+ op: "BatchMatMulV2"
+ input: "gradients/bert/encoder/layer_8/attention/self/transpose_3_grad/transpose"
+ input: "bert/encoder/layer_8/attention/self/transpose_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/MatMul_1"
+ op: "BatchMatMulV2"
+ input: "bert/encoder/layer_8/attention/self/dropout/mul_1"
+ input: "gradients/bert/encoder/layer_8/attention/self/transpose_3_grad/transpose"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/strided_slice/stack"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/strided_slice/stack_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: -2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/strided_slice/stack_2"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/strided_slice"
+ op: "StridedSlice"
+ input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/Shape"
+ input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/strided_slice/stack"
+ input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/strided_slice/stack_1"
+ input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/strided_slice/stack_2"
+ attr {
+ key: "Index"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "begin_mask"
+ value {
+ i: 1
+ }
+ }
+ attr {
+ key: "ellipsis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "end_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "new_axis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "shrink_axis_mask"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/strided_slice_1/stack"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/strided_slice_1/stack_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: -2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/strided_slice_1/stack_2"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/strided_slice_1"
+ op: "StridedSlice"
+ input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/strided_slice_1/stack"
+ input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/strided_slice_1/stack_1"
+ input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/strided_slice_1/stack_2"
+ attr {
+ key: "Index"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "begin_mask"
+ value {
+ i: 1
+ }
+ }
+ attr {
+ key: "ellipsis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "end_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "new_axis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "shrink_axis_mask"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/strided_slice"
+ input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/strided_slice_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/MatMul"
+ input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/Sum"
+ input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/MatMul_1"
+ input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/Reshape"
+ input: "bert/encoder/layer_8/attention/self/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/Reshape"
+ input: "bert/encoder/layer_8/attention/self/dropout/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/transpose_2_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_8/attention/self/transpose_2/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/transpose_2_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_8/attention/self/transpose_2_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_1_grad/Mul"
+ input: "bert/encoder/layer_8/attention/self/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_8/attention/self/Softmax"
+ input: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_1_grad/Mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/Reshape_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/Reshape_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/attention/self/transpose_2_grad/transpose"
+ input: "gradients/bert/encoder/layer_8/attention/self/Reshape_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/Softmax_grad/mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/Reshape"
+ input: "bert/encoder/layer_8/attention/self/Softmax"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/Softmax_grad/Sum/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: -1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/Softmax_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_8/attention/self/Softmax_grad/mul"
+ input: "gradients/bert/encoder/layer_8/attention/self/Softmax_grad/Sum/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/Softmax_grad/sub"
+ op: "Sub"
+ input: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/Reshape"
+ input: "gradients/bert/encoder/layer_8/attention/self/Softmax_grad/Sum"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/Softmax_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_8/attention/self/Softmax_grad/sub"
+ input: "bert/encoder/layer_8/attention/self/Softmax"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_8/attention/self/Reshape_2_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/add_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/add_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\001\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/add_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_8/attention/self/add_grad/Shape"
+ input: "gradients/bert/encoder/layer_8/attention/self/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/add_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_8/attention/self/Softmax_grad/mul_1"
+ input: "gradients/bert/encoder/layer_8/attention/self/add_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/add_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/attention/self/add_grad/Sum"
+ input: "gradients/bert/encoder/layer_8/attention/self/add_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/add_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_8/attention/self/Softmax_grad/mul_1"
+ input: "gradients/bert/encoder/layer_8/attention/self/add_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/add_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/attention/self/add_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_8/attention/self/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/value/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_8/attention/self/Reshape_2_grad/Reshape"
+ input: "bert/encoder/layer_8/attention/self/value/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/value/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/bert/encoder/layer_8/attention/self/Reshape_2_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_8/attention/self/add_grad/Reshape"
+ input: "bert/encoder/layer_8/attention/self/Mul/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_8/attention/self/MatMul"
+ input: "gradients/bert/encoder/layer_8/attention/self/add_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/MatMul_grad/MatMul"
+ op: "BatchMatMulV2"
+ input: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Reshape"
+ input: "bert/encoder/layer_8/attention/self/transpose_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/MatMul_grad/MatMul_1"
+ op: "BatchMatMulV2"
+ input: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Reshape"
+ input: "bert/encoder/layer_8/attention/self/transpose"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/transpose_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_8/attention/self/transpose/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/transpose_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_8/attention/self/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_8/attention/self/transpose_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/transpose_1_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_8/attention/self/transpose_1/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/transpose_1_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_8/attention/self/MatMul_grad/MatMul_1"
+ input: "gradients/bert/encoder/layer_8/attention/self/transpose_1_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/Reshape_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/Reshape_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/attention/self/transpose_grad/transpose"
+ input: "gradients/bert/encoder/layer_8/attention/self/Reshape_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/Reshape_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/Reshape_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_8/attention/self/transpose_1_grad/transpose"
+ input: "gradients/bert/encoder/layer_8/attention/self/Reshape_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_8/attention/self/Reshape_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_8/attention/self/Reshape_1_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/query/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_8/attention/self/Reshape_grad/Reshape"
+ input: "bert/encoder/layer_8/attention/self/query/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/query/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/bert/encoder/layer_8/attention/self/Reshape_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/key/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_8/attention/self/Reshape_1_grad/Reshape"
+ input: "bert/encoder/layer_8/attention/self/key/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_8/attention/self/key/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/bert/encoder/layer_8/attention/self/Reshape_1_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/AddN_27"
+ op: "AddN"
+ input: "gradients/AddN_26"
+ input: "gradients/bert/encoder/layer_8/attention/self/value/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_8/attention/self/query/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_8/attention/self/key/MatMul_grad/MatMul"
+ attr {
+ key: "N"
+ value {
+ i: 4
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_27"
+ input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_27"
+ input: "bert/encoder/layer_7/output/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Shape"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Sum"
+ op: "Sum"
+ input: "gradients/AddN_27"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Sum"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/AddN_27"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Neg"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_7/output/LayerNorm/moments/mean"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_28"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_28"
+ input: "bert/encoder/layer_7/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/AddN_28"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ op: "RsqrtGrad"
+ input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/Shape"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/Sum"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_7/output/LayerNorm/moments/variance/reduction_indices"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/add"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/range/start"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Size"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/range"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/mod"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/Reshape"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Reshape"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Tile"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ op: "Const"
+ input: "^gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 2.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_7/output/add"
+ input: "bert/encoder/layer_7/output/LayerNorm/moments/StopGradient"
+ input: "^gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_7/output/LayerNorm/moments/mean/reduction_indices"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/add"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/range/start"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Size"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/range"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/mod"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Reshape"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Tile"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_29"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/truediv"
+ attr {
+ key: "N"
+ value {
+ i: 3
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/dropout/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_29"
+ input: "bert/encoder/layer_7/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/dropout/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_29"
+ input: "bert/encoder/layer_7/output/dropout/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_7/output/dropout/mul_1_grad/Mul"
+ input: "bert/encoder/layer_7/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_7/output/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_7/output/dropout/mul_1_grad/Mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/dense/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/dense/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Reshape"
+ input: "bert/encoder/layer_7/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/output/dense/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_7/intermediate/dense/mul_3"
+ input: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_3_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_7/output/dense/MatMul_grad/MatMul"
+ input: "bert/encoder/layer_7/intermediate/dense/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_3_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_7/output/dense/MatMul_grad/MatMul"
+ input: "bert/encoder/layer_7/intermediate/dense/BiasAdd"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/Shape"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_3_grad/Mul_1"
+ input: "bert/encoder/layer_7/intermediate/dense/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/Mul"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/Sum"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_7/intermediate/dense/mul_2/x"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_3_grad/Mul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/add_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/add_1_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/add_1_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/add_1_grad/Shape"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/add_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/add_1_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/add_1_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/add_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/add_1_grad/Sum"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/add_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/add_1_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/add_1_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/add_1_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/add_1_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/add_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/Tanh_grad/TanhGrad"
+ op: "TanhGrad"
+ input: "bert/encoder/layer_7/intermediate/dense/Tanh"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/add_1_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Shape"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/Tanh_grad/TanhGrad"
+ input: "bert/encoder/layer_7/intermediate/dense/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Mul"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Sum"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_7/intermediate/dense/mul_1/x"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/Tanh_grad/TanhGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Reshape_1"
+ input: "bert/encoder/layer_7/intermediate/dense/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_7/intermediate/dense/mul/x"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Shape"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Reshape_1"
+ input: "bert/encoder/layer_7/intermediate/dense/Pow/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/sub/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_7/intermediate/dense/Pow/y"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/sub/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Pow"
+ op: "Pow"
+ input: "bert/encoder/layer_7/intermediate/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/mul"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/mul_1"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Sum"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Greater/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Greater"
+ op: "Greater"
+ input: "bert/encoder/layer_7/intermediate/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Greater/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/ones_like/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/ones_like/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/ones_like"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/ones_like/Shape"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/ones_like/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Select"
+ op: "Select"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Greater"
+ input: "bert/encoder/layer_7/intermediate/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/ones_like"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Log"
+ op: "Log"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/zeros_like/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/zeros_like"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/zeros_like/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Select_1"
+ op: "Select"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Greater"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Log"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/zeros_like"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/mul_2"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Reshape_1"
+ input: "bert/encoder/layer_7/intermediate/dense/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/mul_3"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/mul_2"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Select_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/mul_3"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_30"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_3_grad/Mul"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Reshape"
+ attr {
+ key: "N"
+ value {
+ i: 3
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/intermediate/dense/mul_3_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/AddN_30"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/AddN_30"
+ input: "bert/encoder/layer_7/intermediate/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/intermediate/dense/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/AddN_30"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/AddN_31"
+ op: "AddN"
+ input: "gradients/AddN_29"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/MatMul_grad/MatMul"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_31"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_31"
+ input: "bert/encoder/layer_7/attention/output/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Shape"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Sum"
+ op: "Sum"
+ input: "gradients/AddN_31"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Sum"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/AddN_31"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Neg"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/moments/mean"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_32"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_32"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/AddN_32"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ op: "RsqrtGrad"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/Shape"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/Sum"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/moments/variance/reduction_indices"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/add"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/range/start"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Size"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/range"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/mod"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/Reshape"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Reshape"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Tile"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ op: "Const"
+ input: "^gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 2.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_7/attention/output/add"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/moments/StopGradient"
+ input: "^gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/moments/mean/reduction_indices"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/add"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/range/start"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Size"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/range"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/mod"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Reshape"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Tile"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_33"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/truediv"
+ attr {
+ key: "N"
+ value {
+ i: 3
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_33"
+ input: "bert/encoder/layer_7/attention/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_33"
+ input: "bert/encoder/layer_7/attention/output/dropout/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_1_grad/Mul"
+ input: "bert/encoder/layer_7/attention/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_7/attention/output/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_1_grad/Mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/dense/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Reshape"
+ input: "bert/encoder/layer_7/attention/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/output/dense/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_7/attention/self/Reshape_3"
+ input: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/Reshape_3_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/Reshape_3_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/attention/output/dense/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_7/attention/self/Reshape_3_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/transpose_3_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_7/attention/self/transpose_3/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/transpose_3_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_7/attention/self/Reshape_3_grad/Reshape"
+ input: "gradients/bert/encoder/layer_7/attention/self/transpose_3_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/MatMul"
+ op: "BatchMatMulV2"
+ input: "gradients/bert/encoder/layer_7/attention/self/transpose_3_grad/transpose"
+ input: "bert/encoder/layer_7/attention/self/transpose_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/MatMul_1"
+ op: "BatchMatMulV2"
+ input: "bert/encoder/layer_7/attention/self/dropout/mul_1"
+ input: "gradients/bert/encoder/layer_7/attention/self/transpose_3_grad/transpose"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/strided_slice/stack"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/strided_slice/stack_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: -2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/strided_slice/stack_2"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/strided_slice"
+ op: "StridedSlice"
+ input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/Shape"
+ input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/strided_slice/stack"
+ input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/strided_slice/stack_1"
+ input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/strided_slice/stack_2"
+ attr {
+ key: "Index"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "begin_mask"
+ value {
+ i: 1
+ }
+ }
+ attr {
+ key: "ellipsis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "end_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "new_axis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "shrink_axis_mask"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/strided_slice_1/stack"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/strided_slice_1/stack_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: -2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/strided_slice_1/stack_2"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/strided_slice_1"
+ op: "StridedSlice"
+ input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/strided_slice_1/stack"
+ input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/strided_slice_1/stack_1"
+ input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/strided_slice_1/stack_2"
+ attr {
+ key: "Index"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "begin_mask"
+ value {
+ i: 1
+ }
+ }
+ attr {
+ key: "ellipsis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "end_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "new_axis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "shrink_axis_mask"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/strided_slice"
+ input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/strided_slice_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/MatMul"
+ input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/Sum"
+ input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/MatMul_1"
+ input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/Reshape"
+ input: "bert/encoder/layer_7/attention/self/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/Reshape"
+ input: "bert/encoder/layer_7/attention/self/dropout/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/transpose_2_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_7/attention/self/transpose_2/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/transpose_2_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_7/attention/self/transpose_2_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_1_grad/Mul"
+ input: "bert/encoder/layer_7/attention/self/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_7/attention/self/Softmax"
+ input: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_1_grad/Mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/Reshape_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/Reshape_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/attention/self/transpose_2_grad/transpose"
+ input: "gradients/bert/encoder/layer_7/attention/self/Reshape_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/Softmax_grad/mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/Reshape"
+ input: "bert/encoder/layer_7/attention/self/Softmax"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/Softmax_grad/Sum/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: -1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/Softmax_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_7/attention/self/Softmax_grad/mul"
+ input: "gradients/bert/encoder/layer_7/attention/self/Softmax_grad/Sum/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/Softmax_grad/sub"
+ op: "Sub"
+ input: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/Reshape"
+ input: "gradients/bert/encoder/layer_7/attention/self/Softmax_grad/Sum"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/Softmax_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_7/attention/self/Softmax_grad/sub"
+ input: "bert/encoder/layer_7/attention/self/Softmax"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_7/attention/self/Reshape_2_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/add_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/add_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\001\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/add_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_7/attention/self/add_grad/Shape"
+ input: "gradients/bert/encoder/layer_7/attention/self/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/add_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_7/attention/self/Softmax_grad/mul_1"
+ input: "gradients/bert/encoder/layer_7/attention/self/add_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/add_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/attention/self/add_grad/Sum"
+ input: "gradients/bert/encoder/layer_7/attention/self/add_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/add_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_7/attention/self/Softmax_grad/mul_1"
+ input: "gradients/bert/encoder/layer_7/attention/self/add_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/add_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/attention/self/add_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_7/attention/self/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/value/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_7/attention/self/Reshape_2_grad/Reshape"
+ input: "bert/encoder/layer_7/attention/self/value/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/value/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/bert/encoder/layer_7/attention/self/Reshape_2_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_7/attention/self/add_grad/Reshape"
+ input: "bert/encoder/layer_7/attention/self/Mul/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_7/attention/self/MatMul"
+ input: "gradients/bert/encoder/layer_7/attention/self/add_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/MatMul_grad/MatMul"
+ op: "BatchMatMulV2"
+ input: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Reshape"
+ input: "bert/encoder/layer_7/attention/self/transpose_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/MatMul_grad/MatMul_1"
+ op: "BatchMatMulV2"
+ input: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Reshape"
+ input: "bert/encoder/layer_7/attention/self/transpose"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/transpose_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_7/attention/self/transpose/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/transpose_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_7/attention/self/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_7/attention/self/transpose_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/transpose_1_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_7/attention/self/transpose_1/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/transpose_1_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_7/attention/self/MatMul_grad/MatMul_1"
+ input: "gradients/bert/encoder/layer_7/attention/self/transpose_1_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/Reshape_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/Reshape_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/attention/self/transpose_grad/transpose"
+ input: "gradients/bert/encoder/layer_7/attention/self/Reshape_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/Reshape_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/Reshape_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_7/attention/self/transpose_1_grad/transpose"
+ input: "gradients/bert/encoder/layer_7/attention/self/Reshape_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_7/attention/self/Reshape_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_7/attention/self/Reshape_1_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/query/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_7/attention/self/Reshape_grad/Reshape"
+ input: "bert/encoder/layer_7/attention/self/query/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/query/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/bert/encoder/layer_7/attention/self/Reshape_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/key/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_7/attention/self/Reshape_1_grad/Reshape"
+ input: "bert/encoder/layer_7/attention/self/key/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_7/attention/self/key/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/bert/encoder/layer_7/attention/self/Reshape_1_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/AddN_34"
+ op: "AddN"
+ input: "gradients/AddN_33"
+ input: "gradients/bert/encoder/layer_7/attention/self/value/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_7/attention/self/query/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_7/attention/self/key/MatMul_grad/MatMul"
+ attr {
+ key: "N"
+ value {
+ i: 4
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_34"
+ input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_34"
+ input: "bert/encoder/layer_6/output/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Shape"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Sum"
+ op: "Sum"
+ input: "gradients/AddN_34"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Sum"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/AddN_34"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Neg"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_6/output/LayerNorm/moments/mean"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_35"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_35"
+ input: "bert/encoder/layer_6/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/AddN_35"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ op: "RsqrtGrad"
+ input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/Shape"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/Sum"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_6/output/LayerNorm/moments/variance/reduction_indices"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/add"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/range/start"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Size"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/range"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/mod"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/Reshape"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Reshape"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Tile"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ op: "Const"
+ input: "^gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 2.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_6/output/add"
+ input: "bert/encoder/layer_6/output/LayerNorm/moments/StopGradient"
+ input: "^gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_6/output/LayerNorm/moments/mean/reduction_indices"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/add"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/range/start"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Size"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/range"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/mod"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Reshape"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Tile"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_36"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/truediv"
+ attr {
+ key: "N"
+ value {
+ i: 3
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/dropout/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_36"
+ input: "bert/encoder/layer_6/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/dropout/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_36"
+ input: "bert/encoder/layer_6/output/dropout/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_6/output/dropout/mul_1_grad/Mul"
+ input: "bert/encoder/layer_6/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_6/output/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_6/output/dropout/mul_1_grad/Mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/dense/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/dense/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Reshape"
+ input: "bert/encoder/layer_6/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/output/dense/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_6/intermediate/dense/mul_3"
+ input: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_3_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_6/output/dense/MatMul_grad/MatMul"
+ input: "bert/encoder/layer_6/intermediate/dense/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_3_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_6/output/dense/MatMul_grad/MatMul"
+ input: "bert/encoder/layer_6/intermediate/dense/BiasAdd"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/Shape"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_3_grad/Mul_1"
+ input: "bert/encoder/layer_6/intermediate/dense/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/Mul"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/Sum"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_6/intermediate/dense/mul_2/x"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_3_grad/Mul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/add_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/add_1_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/add_1_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/add_1_grad/Shape"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/add_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/add_1_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/add_1_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/add_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/add_1_grad/Sum"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/add_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/add_1_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/add_1_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/add_1_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/add_1_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/add_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/Tanh_grad/TanhGrad"
+ op: "TanhGrad"
+ input: "bert/encoder/layer_6/intermediate/dense/Tanh"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/add_1_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Shape"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/Tanh_grad/TanhGrad"
+ input: "bert/encoder/layer_6/intermediate/dense/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Mul"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Sum"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_6/intermediate/dense/mul_1/x"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/Tanh_grad/TanhGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Reshape_1"
+ input: "bert/encoder/layer_6/intermediate/dense/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_6/intermediate/dense/mul/x"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Shape"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Reshape_1"
+ input: "bert/encoder/layer_6/intermediate/dense/Pow/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/sub/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_6/intermediate/dense/Pow/y"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/sub/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Pow"
+ op: "Pow"
+ input: "bert/encoder/layer_6/intermediate/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/mul"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/mul_1"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Sum"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Greater/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Greater"
+ op: "Greater"
+ input: "bert/encoder/layer_6/intermediate/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Greater/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/ones_like/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/ones_like/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/ones_like"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/ones_like/Shape"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/ones_like/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Select"
+ op: "Select"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Greater"
+ input: "bert/encoder/layer_6/intermediate/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/ones_like"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Log"
+ op: "Log"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/zeros_like/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/zeros_like"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/zeros_like/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Select_1"
+ op: "Select"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Greater"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Log"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/zeros_like"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/mul_2"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Reshape_1"
+ input: "bert/encoder/layer_6/intermediate/dense/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/mul_3"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/mul_2"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Select_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/mul_3"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_37"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_3_grad/Mul"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Reshape"
+ attr {
+ key: "N"
+ value {
+ i: 3
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/intermediate/dense/mul_3_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/AddN_37"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/AddN_37"
+ input: "bert/encoder/layer_6/intermediate/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/intermediate/dense/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/AddN_37"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/AddN_38"
+ op: "AddN"
+ input: "gradients/AddN_36"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/MatMul_grad/MatMul"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_38"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_38"
+ input: "bert/encoder/layer_6/attention/output/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Shape"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Sum"
+ op: "Sum"
+ input: "gradients/AddN_38"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Sum"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/AddN_38"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Neg"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/moments/mean"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_39"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_39"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/AddN_39"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ op: "RsqrtGrad"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/Shape"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/Sum"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/moments/variance/reduction_indices"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/add"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/range/start"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Size"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/range"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/mod"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/Reshape"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Reshape"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Tile"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ op: "Const"
+ input: "^gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 2.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_6/attention/output/add"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/moments/StopGradient"
+ input: "^gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/moments/mean/reduction_indices"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/add"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/range/start"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Size"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/range"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/mod"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Reshape"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Tile"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_40"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/truediv"
+ attr {
+ key: "N"
+ value {
+ i: 3
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_40"
+ input: "bert/encoder/layer_6/attention/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_40"
+ input: "bert/encoder/layer_6/attention/output/dropout/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_1_grad/Mul"
+ input: "bert/encoder/layer_6/attention/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_6/attention/output/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_1_grad/Mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/dense/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Reshape"
+ input: "bert/encoder/layer_6/attention/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/output/dense/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_6/attention/self/Reshape_3"
+ input: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/Reshape_3_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/Reshape_3_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/attention/output/dense/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_6/attention/self/Reshape_3_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/transpose_3_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_6/attention/self/transpose_3/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/transpose_3_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_6/attention/self/Reshape_3_grad/Reshape"
+ input: "gradients/bert/encoder/layer_6/attention/self/transpose_3_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/MatMul"
+ op: "BatchMatMulV2"
+ input: "gradients/bert/encoder/layer_6/attention/self/transpose_3_grad/transpose"
+ input: "bert/encoder/layer_6/attention/self/transpose_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/MatMul_1"
+ op: "BatchMatMulV2"
+ input: "bert/encoder/layer_6/attention/self/dropout/mul_1"
+ input: "gradients/bert/encoder/layer_6/attention/self/transpose_3_grad/transpose"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/strided_slice/stack"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/strided_slice/stack_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: -2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/strided_slice/stack_2"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/strided_slice"
+ op: "StridedSlice"
+ input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/Shape"
+ input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/strided_slice/stack"
+ input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/strided_slice/stack_1"
+ input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/strided_slice/stack_2"
+ attr {
+ key: "Index"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "begin_mask"
+ value {
+ i: 1
+ }
+ }
+ attr {
+ key: "ellipsis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "end_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "new_axis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "shrink_axis_mask"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/strided_slice_1/stack"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/strided_slice_1/stack_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: -2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/strided_slice_1/stack_2"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/strided_slice_1"
+ op: "StridedSlice"
+ input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/strided_slice_1/stack"
+ input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/strided_slice_1/stack_1"
+ input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/strided_slice_1/stack_2"
+ attr {
+ key: "Index"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "begin_mask"
+ value {
+ i: 1
+ }
+ }
+ attr {
+ key: "ellipsis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "end_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "new_axis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "shrink_axis_mask"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/strided_slice"
+ input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/strided_slice_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/MatMul"
+ input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/Sum"
+ input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/MatMul_1"
+ input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/Reshape"
+ input: "bert/encoder/layer_6/attention/self/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/Reshape"
+ input: "bert/encoder/layer_6/attention/self/dropout/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/transpose_2_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_6/attention/self/transpose_2/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/transpose_2_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_6/attention/self/transpose_2_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_1_grad/Mul"
+ input: "bert/encoder/layer_6/attention/self/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_6/attention/self/Softmax"
+ input: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_1_grad/Mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/Reshape_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/Reshape_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/attention/self/transpose_2_grad/transpose"
+ input: "gradients/bert/encoder/layer_6/attention/self/Reshape_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/Softmax_grad/mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/Reshape"
+ input: "bert/encoder/layer_6/attention/self/Softmax"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/Softmax_grad/Sum/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: -1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/Softmax_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_6/attention/self/Softmax_grad/mul"
+ input: "gradients/bert/encoder/layer_6/attention/self/Softmax_grad/Sum/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/Softmax_grad/sub"
+ op: "Sub"
+ input: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/Reshape"
+ input: "gradients/bert/encoder/layer_6/attention/self/Softmax_grad/Sum"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/Softmax_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_6/attention/self/Softmax_grad/sub"
+ input: "bert/encoder/layer_6/attention/self/Softmax"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_6/attention/self/Reshape_2_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/add_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/add_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\001\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/add_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_6/attention/self/add_grad/Shape"
+ input: "gradients/bert/encoder/layer_6/attention/self/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/add_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_6/attention/self/Softmax_grad/mul_1"
+ input: "gradients/bert/encoder/layer_6/attention/self/add_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/add_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/attention/self/add_grad/Sum"
+ input: "gradients/bert/encoder/layer_6/attention/self/add_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/add_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_6/attention/self/Softmax_grad/mul_1"
+ input: "gradients/bert/encoder/layer_6/attention/self/add_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/add_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/attention/self/add_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_6/attention/self/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/value/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_6/attention/self/Reshape_2_grad/Reshape"
+ input: "bert/encoder/layer_6/attention/self/value/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/value/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/bert/encoder/layer_6/attention/self/Reshape_2_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_6/attention/self/add_grad/Reshape"
+ input: "bert/encoder/layer_6/attention/self/Mul/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_6/attention/self/MatMul"
+ input: "gradients/bert/encoder/layer_6/attention/self/add_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/MatMul_grad/MatMul"
+ op: "BatchMatMulV2"
+ input: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Reshape"
+ input: "bert/encoder/layer_6/attention/self/transpose_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/MatMul_grad/MatMul_1"
+ op: "BatchMatMulV2"
+ input: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Reshape"
+ input: "bert/encoder/layer_6/attention/self/transpose"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/transpose_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_6/attention/self/transpose/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/transpose_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_6/attention/self/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_6/attention/self/transpose_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/transpose_1_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_6/attention/self/transpose_1/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/transpose_1_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_6/attention/self/MatMul_grad/MatMul_1"
+ input: "gradients/bert/encoder/layer_6/attention/self/transpose_1_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/Reshape_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/Reshape_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/attention/self/transpose_grad/transpose"
+ input: "gradients/bert/encoder/layer_6/attention/self/Reshape_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/Reshape_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/Reshape_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_6/attention/self/transpose_1_grad/transpose"
+ input: "gradients/bert/encoder/layer_6/attention/self/Reshape_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_6/attention/self/Reshape_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_6/attention/self/Reshape_1_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/query/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_6/attention/self/Reshape_grad/Reshape"
+ input: "bert/encoder/layer_6/attention/self/query/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/query/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/bert/encoder/layer_6/attention/self/Reshape_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/key/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_6/attention/self/Reshape_1_grad/Reshape"
+ input: "bert/encoder/layer_6/attention/self/key/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_6/attention/self/key/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/bert/encoder/layer_6/attention/self/Reshape_1_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/AddN_41"
+ op: "AddN"
+ input: "gradients/AddN_40"
+ input: "gradients/bert/encoder/layer_6/attention/self/value/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_6/attention/self/query/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_6/attention/self/key/MatMul_grad/MatMul"
+ attr {
+ key: "N"
+ value {
+ i: 4
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_41"
+ input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_41"
+ input: "bert/encoder/layer_5/output/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Shape"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Sum"
+ op: "Sum"
+ input: "gradients/AddN_41"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Sum"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/AddN_41"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Neg"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_5/output/LayerNorm/moments/mean"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_42"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_42"
+ input: "bert/encoder/layer_5/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/AddN_42"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ op: "RsqrtGrad"
+ input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/Shape"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/Sum"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_5/output/LayerNorm/moments/variance/reduction_indices"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/add"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/range/start"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Size"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/range"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/mod"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/Reshape"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Reshape"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Tile"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ op: "Const"
+ input: "^gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 2.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_5/output/add"
+ input: "bert/encoder/layer_5/output/LayerNorm/moments/StopGradient"
+ input: "^gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_5/output/LayerNorm/moments/mean/reduction_indices"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/add"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/range/start"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Size"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/range"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/mod"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Reshape"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Tile"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_43"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/truediv"
+ attr {
+ key: "N"
+ value {
+ i: 3
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/dropout/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_43"
+ input: "bert/encoder/layer_5/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/dropout/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_43"
+ input: "bert/encoder/layer_5/output/dropout/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_5/output/dropout/mul_1_grad/Mul"
+ input: "bert/encoder/layer_5/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_5/output/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_5/output/dropout/mul_1_grad/Mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/dense/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/dense/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Reshape"
+ input: "bert/encoder/layer_5/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/output/dense/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_5/intermediate/dense/mul_3"
+ input: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_3_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_5/output/dense/MatMul_grad/MatMul"
+ input: "bert/encoder/layer_5/intermediate/dense/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_3_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_5/output/dense/MatMul_grad/MatMul"
+ input: "bert/encoder/layer_5/intermediate/dense/BiasAdd"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/Shape"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_3_grad/Mul_1"
+ input: "bert/encoder/layer_5/intermediate/dense/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/Mul"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/Sum"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_5/intermediate/dense/mul_2/x"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_3_grad/Mul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/add_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/add_1_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/add_1_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/add_1_grad/Shape"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/add_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/add_1_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/add_1_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/add_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/add_1_grad/Sum"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/add_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/add_1_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/add_1_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/add_1_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/add_1_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/add_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/Tanh_grad/TanhGrad"
+ op: "TanhGrad"
+ input: "bert/encoder/layer_5/intermediate/dense/Tanh"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/add_1_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Shape"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/Tanh_grad/TanhGrad"
+ input: "bert/encoder/layer_5/intermediate/dense/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Mul"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Sum"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_5/intermediate/dense/mul_1/x"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/Tanh_grad/TanhGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Reshape_1"
+ input: "bert/encoder/layer_5/intermediate/dense/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_5/intermediate/dense/mul/x"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Shape"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Reshape_1"
+ input: "bert/encoder/layer_5/intermediate/dense/Pow/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/sub/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_5/intermediate/dense/Pow/y"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/sub/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Pow"
+ op: "Pow"
+ input: "bert/encoder/layer_5/intermediate/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/mul"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/mul_1"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Sum"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Greater/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Greater"
+ op: "Greater"
+ input: "bert/encoder/layer_5/intermediate/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Greater/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/ones_like/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/ones_like/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/ones_like"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/ones_like/Shape"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/ones_like/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Select"
+ op: "Select"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Greater"
+ input: "bert/encoder/layer_5/intermediate/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/ones_like"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Log"
+ op: "Log"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/zeros_like/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/zeros_like"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/zeros_like/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Select_1"
+ op: "Select"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Greater"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Log"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/zeros_like"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/mul_2"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Reshape_1"
+ input: "bert/encoder/layer_5/intermediate/dense/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/mul_3"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/mul_2"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Select_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/mul_3"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_44"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_3_grad/Mul"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Reshape"
+ attr {
+ key: "N"
+ value {
+ i: 3
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/intermediate/dense/mul_3_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/AddN_44"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/AddN_44"
+ input: "bert/encoder/layer_5/intermediate/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/intermediate/dense/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/AddN_44"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/AddN_45"
+ op: "AddN"
+ input: "gradients/AddN_43"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/MatMul_grad/MatMul"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_45"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_45"
+ input: "bert/encoder/layer_5/attention/output/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Shape"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Sum"
+ op: "Sum"
+ input: "gradients/AddN_45"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Sum"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/AddN_45"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Neg"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/moments/mean"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_46"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_46"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/AddN_46"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ op: "RsqrtGrad"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/Shape"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/Sum"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/moments/variance/reduction_indices"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/add"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/range/start"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Size"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/range"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/mod"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/Reshape"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Reshape"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Tile"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ op: "Const"
+ input: "^gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 2.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_5/attention/output/add"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/moments/StopGradient"
+ input: "^gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/moments/mean/reduction_indices"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/add"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/range/start"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Size"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/range"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/mod"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Reshape"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Tile"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_47"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/truediv"
+ attr {
+ key: "N"
+ value {
+ i: 3
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_47"
+ input: "bert/encoder/layer_5/attention/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_47"
+ input: "bert/encoder/layer_5/attention/output/dropout/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_1_grad/Mul"
+ input: "bert/encoder/layer_5/attention/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_5/attention/output/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_1_grad/Mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/dense/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Reshape"
+ input: "bert/encoder/layer_5/attention/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/output/dense/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_5/attention/self/Reshape_3"
+ input: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/Reshape_3_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/Reshape_3_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/attention/output/dense/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_5/attention/self/Reshape_3_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/transpose_3_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_5/attention/self/transpose_3/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/transpose_3_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_5/attention/self/Reshape_3_grad/Reshape"
+ input: "gradients/bert/encoder/layer_5/attention/self/transpose_3_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/MatMul"
+ op: "BatchMatMulV2"
+ input: "gradients/bert/encoder/layer_5/attention/self/transpose_3_grad/transpose"
+ input: "bert/encoder/layer_5/attention/self/transpose_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/MatMul_1"
+ op: "BatchMatMulV2"
+ input: "bert/encoder/layer_5/attention/self/dropout/mul_1"
+ input: "gradients/bert/encoder/layer_5/attention/self/transpose_3_grad/transpose"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/strided_slice/stack"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/strided_slice/stack_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: -2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/strided_slice/stack_2"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/strided_slice"
+ op: "StridedSlice"
+ input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/Shape"
+ input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/strided_slice/stack"
+ input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/strided_slice/stack_1"
+ input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/strided_slice/stack_2"
+ attr {
+ key: "Index"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "begin_mask"
+ value {
+ i: 1
+ }
+ }
+ attr {
+ key: "ellipsis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "end_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "new_axis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "shrink_axis_mask"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/strided_slice_1/stack"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/strided_slice_1/stack_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: -2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/strided_slice_1/stack_2"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/strided_slice_1"
+ op: "StridedSlice"
+ input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/strided_slice_1/stack"
+ input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/strided_slice_1/stack_1"
+ input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/strided_slice_1/stack_2"
+ attr {
+ key: "Index"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "begin_mask"
+ value {
+ i: 1
+ }
+ }
+ attr {
+ key: "ellipsis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "end_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "new_axis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "shrink_axis_mask"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/strided_slice"
+ input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/strided_slice_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/MatMul"
+ input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/Sum"
+ input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/MatMul_1"
+ input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/Reshape"
+ input: "bert/encoder/layer_5/attention/self/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/Reshape"
+ input: "bert/encoder/layer_5/attention/self/dropout/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/transpose_2_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_5/attention/self/transpose_2/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/transpose_2_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_5/attention/self/transpose_2_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_1_grad/Mul"
+ input: "bert/encoder/layer_5/attention/self/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_5/attention/self/Softmax"
+ input: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_1_grad/Mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/Reshape_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/Reshape_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/attention/self/transpose_2_grad/transpose"
+ input: "gradients/bert/encoder/layer_5/attention/self/Reshape_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/Softmax_grad/mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/Reshape"
+ input: "bert/encoder/layer_5/attention/self/Softmax"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/Softmax_grad/Sum/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: -1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/Softmax_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_5/attention/self/Softmax_grad/mul"
+ input: "gradients/bert/encoder/layer_5/attention/self/Softmax_grad/Sum/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/Softmax_grad/sub"
+ op: "Sub"
+ input: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/Reshape"
+ input: "gradients/bert/encoder/layer_5/attention/self/Softmax_grad/Sum"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/Softmax_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_5/attention/self/Softmax_grad/sub"
+ input: "bert/encoder/layer_5/attention/self/Softmax"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_5/attention/self/Reshape_2_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/add_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/add_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\001\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/add_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_5/attention/self/add_grad/Shape"
+ input: "gradients/bert/encoder/layer_5/attention/self/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/add_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_5/attention/self/Softmax_grad/mul_1"
+ input: "gradients/bert/encoder/layer_5/attention/self/add_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/add_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/attention/self/add_grad/Sum"
+ input: "gradients/bert/encoder/layer_5/attention/self/add_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/add_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_5/attention/self/Softmax_grad/mul_1"
+ input: "gradients/bert/encoder/layer_5/attention/self/add_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/add_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/attention/self/add_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_5/attention/self/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/value/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_5/attention/self/Reshape_2_grad/Reshape"
+ input: "bert/encoder/layer_5/attention/self/value/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/value/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/bert/encoder/layer_5/attention/self/Reshape_2_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_5/attention/self/add_grad/Reshape"
+ input: "bert/encoder/layer_5/attention/self/Mul/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_5/attention/self/MatMul"
+ input: "gradients/bert/encoder/layer_5/attention/self/add_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/MatMul_grad/MatMul"
+ op: "BatchMatMulV2"
+ input: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Reshape"
+ input: "bert/encoder/layer_5/attention/self/transpose_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/MatMul_grad/MatMul_1"
+ op: "BatchMatMulV2"
+ input: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Reshape"
+ input: "bert/encoder/layer_5/attention/self/transpose"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/transpose_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_5/attention/self/transpose/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/transpose_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_5/attention/self/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_5/attention/self/transpose_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/transpose_1_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_5/attention/self/transpose_1/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/transpose_1_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_5/attention/self/MatMul_grad/MatMul_1"
+ input: "gradients/bert/encoder/layer_5/attention/self/transpose_1_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/Reshape_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/Reshape_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/attention/self/transpose_grad/transpose"
+ input: "gradients/bert/encoder/layer_5/attention/self/Reshape_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/Reshape_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/Reshape_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_5/attention/self/transpose_1_grad/transpose"
+ input: "gradients/bert/encoder/layer_5/attention/self/Reshape_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_5/attention/self/Reshape_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_5/attention/self/Reshape_1_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/query/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_5/attention/self/Reshape_grad/Reshape"
+ input: "bert/encoder/layer_5/attention/self/query/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/query/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/bert/encoder/layer_5/attention/self/Reshape_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/key/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_5/attention/self/Reshape_1_grad/Reshape"
+ input: "bert/encoder/layer_5/attention/self/key/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_5/attention/self/key/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/bert/encoder/layer_5/attention/self/Reshape_1_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/AddN_48"
+ op: "AddN"
+ input: "gradients/AddN_47"
+ input: "gradients/bert/encoder/layer_5/attention/self/value/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_5/attention/self/query/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_5/attention/self/key/MatMul_grad/MatMul"
+ attr {
+ key: "N"
+ value {
+ i: 4
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_48"
+ input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_48"
+ input: "bert/encoder/layer_4/output/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Shape"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Sum"
+ op: "Sum"
+ input: "gradients/AddN_48"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Sum"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/AddN_48"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Neg"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_4/output/LayerNorm/moments/mean"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_49"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_49"
+ input: "bert/encoder/layer_4/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/AddN_49"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ op: "RsqrtGrad"
+ input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/Shape"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/Sum"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_4/output/LayerNorm/moments/variance/reduction_indices"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/add"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/range/start"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Size"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/range"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/mod"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/Reshape"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Reshape"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Tile"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ op: "Const"
+ input: "^gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 2.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_4/output/add"
+ input: "bert/encoder/layer_4/output/LayerNorm/moments/StopGradient"
+ input: "^gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_4/output/LayerNorm/moments/mean/reduction_indices"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/add"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/range/start"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Size"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/range"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/mod"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Reshape"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Tile"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_50"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/truediv"
+ attr {
+ key: "N"
+ value {
+ i: 3
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/dropout/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_50"
+ input: "bert/encoder/layer_4/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/dropout/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_50"
+ input: "bert/encoder/layer_4/output/dropout/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_4/output/dropout/mul_1_grad/Mul"
+ input: "bert/encoder/layer_4/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_4/output/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_4/output/dropout/mul_1_grad/Mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/dense/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/dense/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Reshape"
+ input: "bert/encoder/layer_4/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/output/dense/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_4/intermediate/dense/mul_3"
+ input: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_3_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_4/output/dense/MatMul_grad/MatMul"
+ input: "bert/encoder/layer_4/intermediate/dense/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_3_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_4/output/dense/MatMul_grad/MatMul"
+ input: "bert/encoder/layer_4/intermediate/dense/BiasAdd"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/Shape"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_3_grad/Mul_1"
+ input: "bert/encoder/layer_4/intermediate/dense/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/Mul"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/Sum"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_4/intermediate/dense/mul_2/x"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_3_grad/Mul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/add_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/add_1_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/add_1_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/add_1_grad/Shape"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/add_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/add_1_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/add_1_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/add_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/add_1_grad/Sum"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/add_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/add_1_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/add_1_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/add_1_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/add_1_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/add_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/Tanh_grad/TanhGrad"
+ op: "TanhGrad"
+ input: "bert/encoder/layer_4/intermediate/dense/Tanh"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/add_1_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Shape"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/Tanh_grad/TanhGrad"
+ input: "bert/encoder/layer_4/intermediate/dense/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Mul"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Sum"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_4/intermediate/dense/mul_1/x"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/Tanh_grad/TanhGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Reshape_1"
+ input: "bert/encoder/layer_4/intermediate/dense/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_4/intermediate/dense/mul/x"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Shape"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Reshape_1"
+ input: "bert/encoder/layer_4/intermediate/dense/Pow/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/sub/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_4/intermediate/dense/Pow/y"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/sub/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Pow"
+ op: "Pow"
+ input: "bert/encoder/layer_4/intermediate/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/mul"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/mul_1"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Sum"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Greater/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Greater"
+ op: "Greater"
+ input: "bert/encoder/layer_4/intermediate/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Greater/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/ones_like/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/ones_like/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/ones_like"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/ones_like/Shape"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/ones_like/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Select"
+ op: "Select"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Greater"
+ input: "bert/encoder/layer_4/intermediate/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/ones_like"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Log"
+ op: "Log"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/zeros_like/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/zeros_like"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/zeros_like/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Select_1"
+ op: "Select"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Greater"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Log"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/zeros_like"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/mul_2"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Reshape_1"
+ input: "bert/encoder/layer_4/intermediate/dense/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/mul_3"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/mul_2"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Select_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/mul_3"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_51"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_3_grad/Mul"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Reshape"
+ attr {
+ key: "N"
+ value {
+ i: 3
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/intermediate/dense/mul_3_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/AddN_51"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/AddN_51"
+ input: "bert/encoder/layer_4/intermediate/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/intermediate/dense/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/AddN_51"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/AddN_52"
+ op: "AddN"
+ input: "gradients/AddN_50"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/MatMul_grad/MatMul"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_52"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_52"
+ input: "bert/encoder/layer_4/attention/output/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Shape"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Sum"
+ op: "Sum"
+ input: "gradients/AddN_52"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Sum"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/AddN_52"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Neg"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/moments/mean"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_53"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_53"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/AddN_53"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ op: "RsqrtGrad"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/Shape"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/Sum"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/moments/variance/reduction_indices"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/add"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/range/start"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Size"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/range"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/mod"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/Reshape"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Reshape"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Tile"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ op: "Const"
+ input: "^gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 2.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_4/attention/output/add"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/moments/StopGradient"
+ input: "^gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/moments/mean/reduction_indices"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/add"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/range/start"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Size"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/range"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/mod"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Reshape"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Tile"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_54"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/truediv"
+ attr {
+ key: "N"
+ value {
+ i: 3
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_54"
+ input: "bert/encoder/layer_4/attention/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_54"
+ input: "bert/encoder/layer_4/attention/output/dropout/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_1_grad/Mul"
+ input: "bert/encoder/layer_4/attention/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_4/attention/output/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_1_grad/Mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/dense/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Reshape"
+ input: "bert/encoder/layer_4/attention/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/output/dense/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_4/attention/self/Reshape_3"
+ input: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/Reshape_3_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/Reshape_3_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/attention/output/dense/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_4/attention/self/Reshape_3_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/transpose_3_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_4/attention/self/transpose_3/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/transpose_3_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_4/attention/self/Reshape_3_grad/Reshape"
+ input: "gradients/bert/encoder/layer_4/attention/self/transpose_3_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/MatMul"
+ op: "BatchMatMulV2"
+ input: "gradients/bert/encoder/layer_4/attention/self/transpose_3_grad/transpose"
+ input: "bert/encoder/layer_4/attention/self/transpose_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/MatMul_1"
+ op: "BatchMatMulV2"
+ input: "bert/encoder/layer_4/attention/self/dropout/mul_1"
+ input: "gradients/bert/encoder/layer_4/attention/self/transpose_3_grad/transpose"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/strided_slice/stack"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/strided_slice/stack_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: -2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/strided_slice/stack_2"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/strided_slice"
+ op: "StridedSlice"
+ input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/Shape"
+ input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/strided_slice/stack"
+ input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/strided_slice/stack_1"
+ input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/strided_slice/stack_2"
+ attr {
+ key: "Index"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "begin_mask"
+ value {
+ i: 1
+ }
+ }
+ attr {
+ key: "ellipsis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "end_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "new_axis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "shrink_axis_mask"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/strided_slice_1/stack"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/strided_slice_1/stack_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: -2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/strided_slice_1/stack_2"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/strided_slice_1"
+ op: "StridedSlice"
+ input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/strided_slice_1/stack"
+ input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/strided_slice_1/stack_1"
+ input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/strided_slice_1/stack_2"
+ attr {
+ key: "Index"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "begin_mask"
+ value {
+ i: 1
+ }
+ }
+ attr {
+ key: "ellipsis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "end_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "new_axis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "shrink_axis_mask"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/strided_slice"
+ input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/strided_slice_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/MatMul"
+ input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/Sum"
+ input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/MatMul_1"
+ input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/Reshape"
+ input: "bert/encoder/layer_4/attention/self/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/Reshape"
+ input: "bert/encoder/layer_4/attention/self/dropout/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/transpose_2_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_4/attention/self/transpose_2/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/transpose_2_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_4/attention/self/transpose_2_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_1_grad/Mul"
+ input: "bert/encoder/layer_4/attention/self/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_4/attention/self/Softmax"
+ input: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_1_grad/Mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/Reshape_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/Reshape_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/attention/self/transpose_2_grad/transpose"
+ input: "gradients/bert/encoder/layer_4/attention/self/Reshape_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/Softmax_grad/mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/Reshape"
+ input: "bert/encoder/layer_4/attention/self/Softmax"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/Softmax_grad/Sum/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: -1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/Softmax_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_4/attention/self/Softmax_grad/mul"
+ input: "gradients/bert/encoder/layer_4/attention/self/Softmax_grad/Sum/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/Softmax_grad/sub"
+ op: "Sub"
+ input: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/Reshape"
+ input: "gradients/bert/encoder/layer_4/attention/self/Softmax_grad/Sum"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/Softmax_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_4/attention/self/Softmax_grad/sub"
+ input: "bert/encoder/layer_4/attention/self/Softmax"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_4/attention/self/Reshape_2_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/add_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/add_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\001\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/add_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_4/attention/self/add_grad/Shape"
+ input: "gradients/bert/encoder/layer_4/attention/self/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/add_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_4/attention/self/Softmax_grad/mul_1"
+ input: "gradients/bert/encoder/layer_4/attention/self/add_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/add_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/attention/self/add_grad/Sum"
+ input: "gradients/bert/encoder/layer_4/attention/self/add_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/add_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_4/attention/self/Softmax_grad/mul_1"
+ input: "gradients/bert/encoder/layer_4/attention/self/add_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/add_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/attention/self/add_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_4/attention/self/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/value/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_4/attention/self/Reshape_2_grad/Reshape"
+ input: "bert/encoder/layer_4/attention/self/value/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/value/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/bert/encoder/layer_4/attention/self/Reshape_2_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_4/attention/self/add_grad/Reshape"
+ input: "bert/encoder/layer_4/attention/self/Mul/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_4/attention/self/MatMul"
+ input: "gradients/bert/encoder/layer_4/attention/self/add_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/MatMul_grad/MatMul"
+ op: "BatchMatMulV2"
+ input: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Reshape"
+ input: "bert/encoder/layer_4/attention/self/transpose_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/MatMul_grad/MatMul_1"
+ op: "BatchMatMulV2"
+ input: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Reshape"
+ input: "bert/encoder/layer_4/attention/self/transpose"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/transpose_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_4/attention/self/transpose/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/transpose_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_4/attention/self/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_4/attention/self/transpose_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/transpose_1_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_4/attention/self/transpose_1/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/transpose_1_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_4/attention/self/MatMul_grad/MatMul_1"
+ input: "gradients/bert/encoder/layer_4/attention/self/transpose_1_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/Reshape_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/Reshape_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/attention/self/transpose_grad/transpose"
+ input: "gradients/bert/encoder/layer_4/attention/self/Reshape_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/Reshape_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/Reshape_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_4/attention/self/transpose_1_grad/transpose"
+ input: "gradients/bert/encoder/layer_4/attention/self/Reshape_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_4/attention/self/Reshape_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_4/attention/self/Reshape_1_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/query/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_4/attention/self/Reshape_grad/Reshape"
+ input: "bert/encoder/layer_4/attention/self/query/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/query/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/bert/encoder/layer_4/attention/self/Reshape_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/key/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_4/attention/self/Reshape_1_grad/Reshape"
+ input: "bert/encoder/layer_4/attention/self/key/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_4/attention/self/key/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/bert/encoder/layer_4/attention/self/Reshape_1_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/AddN_55"
+ op: "AddN"
+ input: "gradients/AddN_54"
+ input: "gradients/bert/encoder/layer_4/attention/self/value/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_4/attention/self/query/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_4/attention/self/key/MatMul_grad/MatMul"
+ attr {
+ key: "N"
+ value {
+ i: 4
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_55"
+ input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_55"
+ input: "bert/encoder/layer_3/output/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Shape"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Sum"
+ op: "Sum"
+ input: "gradients/AddN_55"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Sum"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/AddN_55"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Neg"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_3/output/LayerNorm/moments/mean"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_56"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_56"
+ input: "bert/encoder/layer_3/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/AddN_56"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ op: "RsqrtGrad"
+ input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/Shape"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/Sum"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_3/output/LayerNorm/moments/variance/reduction_indices"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/add"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/range/start"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Size"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/range"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/mod"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/Reshape"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Reshape"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Tile"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ op: "Const"
+ input: "^gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 2.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_3/output/add"
+ input: "bert/encoder/layer_3/output/LayerNorm/moments/StopGradient"
+ input: "^gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_3/output/LayerNorm/moments/mean/reduction_indices"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/add"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/range/start"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Size"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/range"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/mod"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Reshape"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Tile"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_57"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/truediv"
+ attr {
+ key: "N"
+ value {
+ i: 3
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/dropout/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_57"
+ input: "bert/encoder/layer_3/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/dropout/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_57"
+ input: "bert/encoder/layer_3/output/dropout/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_3/output/dropout/mul_1_grad/Mul"
+ input: "bert/encoder/layer_3/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_3/output/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_3/output/dropout/mul_1_grad/Mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/dense/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/dense/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Reshape"
+ input: "bert/encoder/layer_3/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/output/dense/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_3/intermediate/dense/mul_3"
+ input: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_3_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_3/output/dense/MatMul_grad/MatMul"
+ input: "bert/encoder/layer_3/intermediate/dense/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_3_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_3/output/dense/MatMul_grad/MatMul"
+ input: "bert/encoder/layer_3/intermediate/dense/BiasAdd"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/Shape"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_3_grad/Mul_1"
+ input: "bert/encoder/layer_3/intermediate/dense/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/Mul"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/Sum"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_3/intermediate/dense/mul_2/x"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_3_grad/Mul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/add_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/add_1_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/add_1_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/add_1_grad/Shape"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/add_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/add_1_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/add_1_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/add_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/add_1_grad/Sum"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/add_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/add_1_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/add_1_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/add_1_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/add_1_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/add_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/Tanh_grad/TanhGrad"
+ op: "TanhGrad"
+ input: "bert/encoder/layer_3/intermediate/dense/Tanh"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/add_1_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Shape"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/Tanh_grad/TanhGrad"
+ input: "bert/encoder/layer_3/intermediate/dense/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Mul"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Sum"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_3/intermediate/dense/mul_1/x"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/Tanh_grad/TanhGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Reshape_1"
+ input: "bert/encoder/layer_3/intermediate/dense/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_3/intermediate/dense/mul/x"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Shape"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Reshape_1"
+ input: "bert/encoder/layer_3/intermediate/dense/Pow/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/sub/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_3/intermediate/dense/Pow/y"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/sub/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Pow"
+ op: "Pow"
+ input: "bert/encoder/layer_3/intermediate/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/mul"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/mul_1"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Sum"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Greater/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Greater"
+ op: "Greater"
+ input: "bert/encoder/layer_3/intermediate/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Greater/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/ones_like/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/ones_like/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/ones_like"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/ones_like/Shape"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/ones_like/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Select"
+ op: "Select"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Greater"
+ input: "bert/encoder/layer_3/intermediate/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/ones_like"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Log"
+ op: "Log"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/zeros_like/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/zeros_like"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/zeros_like/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Select_1"
+ op: "Select"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Greater"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Log"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/zeros_like"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/mul_2"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Reshape_1"
+ input: "bert/encoder/layer_3/intermediate/dense/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/mul_3"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/mul_2"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Select_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/mul_3"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_58"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_3_grad/Mul"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Reshape"
+ attr {
+ key: "N"
+ value {
+ i: 3
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/intermediate/dense/mul_3_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/AddN_58"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/AddN_58"
+ input: "bert/encoder/layer_3/intermediate/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/intermediate/dense/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/AddN_58"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/AddN_59"
+ op: "AddN"
+ input: "gradients/AddN_57"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/MatMul_grad/MatMul"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_59"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_59"
+ input: "bert/encoder/layer_3/attention/output/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Shape"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Sum"
+ op: "Sum"
+ input: "gradients/AddN_59"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Sum"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/AddN_59"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Neg"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/moments/mean"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_60"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_60"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/AddN_60"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ op: "RsqrtGrad"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/Shape"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/Sum"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/moments/variance/reduction_indices"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/add"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/range/start"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Size"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/range"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/mod"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/Reshape"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Reshape"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Tile"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ op: "Const"
+ input: "^gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 2.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_3/attention/output/add"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/moments/StopGradient"
+ input: "^gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/moments/mean/reduction_indices"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/add"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/range/start"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Size"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/range"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/mod"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Reshape"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Tile"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_61"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/truediv"
+ attr {
+ key: "N"
+ value {
+ i: 3
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_61"
+ input: "bert/encoder/layer_3/attention/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_61"
+ input: "bert/encoder/layer_3/attention/output/dropout/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_1_grad/Mul"
+ input: "bert/encoder/layer_3/attention/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_3/attention/output/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_1_grad/Mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/dense/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Reshape"
+ input: "bert/encoder/layer_3/attention/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/output/dense/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_3/attention/self/Reshape_3"
+ input: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/Reshape_3_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/Reshape_3_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/attention/output/dense/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_3/attention/self/Reshape_3_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/transpose_3_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_3/attention/self/transpose_3/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/transpose_3_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_3/attention/self/Reshape_3_grad/Reshape"
+ input: "gradients/bert/encoder/layer_3/attention/self/transpose_3_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/MatMul"
+ op: "BatchMatMulV2"
+ input: "gradients/bert/encoder/layer_3/attention/self/transpose_3_grad/transpose"
+ input: "bert/encoder/layer_3/attention/self/transpose_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/MatMul_1"
+ op: "BatchMatMulV2"
+ input: "bert/encoder/layer_3/attention/self/dropout/mul_1"
+ input: "gradients/bert/encoder/layer_3/attention/self/transpose_3_grad/transpose"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/strided_slice/stack"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/strided_slice/stack_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: -2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/strided_slice/stack_2"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/strided_slice"
+ op: "StridedSlice"
+ input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/Shape"
+ input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/strided_slice/stack"
+ input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/strided_slice/stack_1"
+ input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/strided_slice/stack_2"
+ attr {
+ key: "Index"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "begin_mask"
+ value {
+ i: 1
+ }
+ }
+ attr {
+ key: "ellipsis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "end_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "new_axis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "shrink_axis_mask"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/strided_slice_1/stack"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/strided_slice_1/stack_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: -2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/strided_slice_1/stack_2"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/strided_slice_1"
+ op: "StridedSlice"
+ input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/strided_slice_1/stack"
+ input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/strided_slice_1/stack_1"
+ input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/strided_slice_1/stack_2"
+ attr {
+ key: "Index"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "begin_mask"
+ value {
+ i: 1
+ }
+ }
+ attr {
+ key: "ellipsis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "end_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "new_axis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "shrink_axis_mask"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/strided_slice"
+ input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/strided_slice_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/MatMul"
+ input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/Sum"
+ input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/MatMul_1"
+ input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/Reshape"
+ input: "bert/encoder/layer_3/attention/self/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/Reshape"
+ input: "bert/encoder/layer_3/attention/self/dropout/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/transpose_2_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_3/attention/self/transpose_2/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/transpose_2_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_3/attention/self/transpose_2_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_1_grad/Mul"
+ input: "bert/encoder/layer_3/attention/self/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_3/attention/self/Softmax"
+ input: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_1_grad/Mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/Reshape_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/Reshape_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/attention/self/transpose_2_grad/transpose"
+ input: "gradients/bert/encoder/layer_3/attention/self/Reshape_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/Softmax_grad/mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/Reshape"
+ input: "bert/encoder/layer_3/attention/self/Softmax"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/Softmax_grad/Sum/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: -1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/Softmax_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_3/attention/self/Softmax_grad/mul"
+ input: "gradients/bert/encoder/layer_3/attention/self/Softmax_grad/Sum/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/Softmax_grad/sub"
+ op: "Sub"
+ input: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/Reshape"
+ input: "gradients/bert/encoder/layer_3/attention/self/Softmax_grad/Sum"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/Softmax_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_3/attention/self/Softmax_grad/sub"
+ input: "bert/encoder/layer_3/attention/self/Softmax"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_3/attention/self/Reshape_2_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/add_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/add_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\001\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/add_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_3/attention/self/add_grad/Shape"
+ input: "gradients/bert/encoder/layer_3/attention/self/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/add_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_3/attention/self/Softmax_grad/mul_1"
+ input: "gradients/bert/encoder/layer_3/attention/self/add_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/add_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/attention/self/add_grad/Sum"
+ input: "gradients/bert/encoder/layer_3/attention/self/add_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/add_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_3/attention/self/Softmax_grad/mul_1"
+ input: "gradients/bert/encoder/layer_3/attention/self/add_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/add_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/attention/self/add_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_3/attention/self/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/value/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_3/attention/self/Reshape_2_grad/Reshape"
+ input: "bert/encoder/layer_3/attention/self/value/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/value/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/bert/encoder/layer_3/attention/self/Reshape_2_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_3/attention/self/add_grad/Reshape"
+ input: "bert/encoder/layer_3/attention/self/Mul/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_3/attention/self/MatMul"
+ input: "gradients/bert/encoder/layer_3/attention/self/add_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/MatMul_grad/MatMul"
+ op: "BatchMatMulV2"
+ input: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Reshape"
+ input: "bert/encoder/layer_3/attention/self/transpose_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/MatMul_grad/MatMul_1"
+ op: "BatchMatMulV2"
+ input: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Reshape"
+ input: "bert/encoder/layer_3/attention/self/transpose"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/transpose_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_3/attention/self/transpose/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/transpose_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_3/attention/self/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_3/attention/self/transpose_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/transpose_1_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_3/attention/self/transpose_1/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/transpose_1_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_3/attention/self/MatMul_grad/MatMul_1"
+ input: "gradients/bert/encoder/layer_3/attention/self/transpose_1_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/Reshape_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/Reshape_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/attention/self/transpose_grad/transpose"
+ input: "gradients/bert/encoder/layer_3/attention/self/Reshape_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/Reshape_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/Reshape_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_3/attention/self/transpose_1_grad/transpose"
+ input: "gradients/bert/encoder/layer_3/attention/self/Reshape_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_3/attention/self/Reshape_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_3/attention/self/Reshape_1_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/query/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_3/attention/self/Reshape_grad/Reshape"
+ input: "bert/encoder/layer_3/attention/self/query/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/query/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/bert/encoder/layer_3/attention/self/Reshape_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/key/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_3/attention/self/Reshape_1_grad/Reshape"
+ input: "bert/encoder/layer_3/attention/self/key/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_3/attention/self/key/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/bert/encoder/layer_3/attention/self/Reshape_1_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/AddN_62"
+ op: "AddN"
+ input: "gradients/AddN_61"
+ input: "gradients/bert/encoder/layer_3/attention/self/value/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_3/attention/self/query/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_3/attention/self/key/MatMul_grad/MatMul"
+ attr {
+ key: "N"
+ value {
+ i: 4
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_62"
+ input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_62"
+ input: "bert/encoder/layer_2/output/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Shape"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Sum"
+ op: "Sum"
+ input: "gradients/AddN_62"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Sum"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/AddN_62"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Neg"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_2/output/LayerNorm/moments/mean"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_63"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_63"
+ input: "bert/encoder/layer_2/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/AddN_63"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ op: "RsqrtGrad"
+ input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/Shape"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/Sum"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_2/output/LayerNorm/moments/variance/reduction_indices"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/add"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/range/start"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Size"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/range"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/mod"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/Reshape"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Reshape"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Tile"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ op: "Const"
+ input: "^gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 2.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_2/output/add"
+ input: "bert/encoder/layer_2/output/LayerNorm/moments/StopGradient"
+ input: "^gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_2/output/LayerNorm/moments/mean/reduction_indices"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/add"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/range/start"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Size"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/range"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/mod"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Reshape"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Tile"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_64"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/truediv"
+ attr {
+ key: "N"
+ value {
+ i: 3
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/dropout/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_64"
+ input: "bert/encoder/layer_2/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/dropout/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_64"
+ input: "bert/encoder/layer_2/output/dropout/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_2/output/dropout/mul_1_grad/Mul"
+ input: "bert/encoder/layer_2/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_2/output/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_2/output/dropout/mul_1_grad/Mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/dense/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/dense/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Reshape"
+ input: "bert/encoder/layer_2/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/output/dense/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_2/intermediate/dense/mul_3"
+ input: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_3_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_2/output/dense/MatMul_grad/MatMul"
+ input: "bert/encoder/layer_2/intermediate/dense/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_3_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_2/output/dense/MatMul_grad/MatMul"
+ input: "bert/encoder/layer_2/intermediate/dense/BiasAdd"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/Shape"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_3_grad/Mul_1"
+ input: "bert/encoder/layer_2/intermediate/dense/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/Mul"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/Sum"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_2/intermediate/dense/mul_2/x"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_3_grad/Mul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/add_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/add_1_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/add_1_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/add_1_grad/Shape"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/add_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/add_1_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/add_1_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/add_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/add_1_grad/Sum"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/add_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/add_1_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/add_1_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/add_1_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/add_1_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/add_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/Tanh_grad/TanhGrad"
+ op: "TanhGrad"
+ input: "bert/encoder/layer_2/intermediate/dense/Tanh"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/add_1_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Shape"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/Tanh_grad/TanhGrad"
+ input: "bert/encoder/layer_2/intermediate/dense/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Mul"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Sum"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_2/intermediate/dense/mul_1/x"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/Tanh_grad/TanhGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Reshape_1"
+ input: "bert/encoder/layer_2/intermediate/dense/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_2/intermediate/dense/mul/x"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Shape"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Reshape_1"
+ input: "bert/encoder/layer_2/intermediate/dense/Pow/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/sub/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_2/intermediate/dense/Pow/y"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/sub/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Pow"
+ op: "Pow"
+ input: "bert/encoder/layer_2/intermediate/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/mul"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/mul_1"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Sum"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Greater/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Greater"
+ op: "Greater"
+ input: "bert/encoder/layer_2/intermediate/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Greater/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/ones_like/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/ones_like/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/ones_like"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/ones_like/Shape"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/ones_like/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Select"
+ op: "Select"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Greater"
+ input: "bert/encoder/layer_2/intermediate/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/ones_like"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Log"
+ op: "Log"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/zeros_like/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/zeros_like"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/zeros_like/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Select_1"
+ op: "Select"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Greater"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Log"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/zeros_like"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/mul_2"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Reshape_1"
+ input: "bert/encoder/layer_2/intermediate/dense/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/mul_3"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/mul_2"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Select_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/mul_3"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_65"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_3_grad/Mul"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Reshape"
+ attr {
+ key: "N"
+ value {
+ i: 3
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/intermediate/dense/mul_3_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/AddN_65"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/AddN_65"
+ input: "bert/encoder/layer_2/intermediate/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/intermediate/dense/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/AddN_65"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/AddN_66"
+ op: "AddN"
+ input: "gradients/AddN_64"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/MatMul_grad/MatMul"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_66"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_66"
+ input: "bert/encoder/layer_2/attention/output/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Shape"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Sum"
+ op: "Sum"
+ input: "gradients/AddN_66"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Sum"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/AddN_66"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Neg"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/moments/mean"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_67"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_67"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/AddN_67"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ op: "RsqrtGrad"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/Shape"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/Sum"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/moments/variance/reduction_indices"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/add"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/range/start"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Size"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/range"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/mod"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/Reshape"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Reshape"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Tile"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ op: "Const"
+ input: "^gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 2.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_2/attention/output/add"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/moments/StopGradient"
+ input: "^gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/moments/mean/reduction_indices"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/add"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/range/start"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Size"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/range"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/mod"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Reshape"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Tile"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_68"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/truediv"
+ attr {
+ key: "N"
+ value {
+ i: 3
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_68"
+ input: "bert/encoder/layer_2/attention/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_68"
+ input: "bert/encoder/layer_2/attention/output/dropout/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_1_grad/Mul"
+ input: "bert/encoder/layer_2/attention/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_2/attention/output/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_1_grad/Mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/dense/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Reshape"
+ input: "bert/encoder/layer_2/attention/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/output/dense/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_2/attention/self/Reshape_3"
+ input: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/Reshape_3_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/Reshape_3_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/attention/output/dense/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_2/attention/self/Reshape_3_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/transpose_3_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_2/attention/self/transpose_3/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/transpose_3_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_2/attention/self/Reshape_3_grad/Reshape"
+ input: "gradients/bert/encoder/layer_2/attention/self/transpose_3_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/MatMul"
+ op: "BatchMatMulV2"
+ input: "gradients/bert/encoder/layer_2/attention/self/transpose_3_grad/transpose"
+ input: "bert/encoder/layer_2/attention/self/transpose_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/MatMul_1"
+ op: "BatchMatMulV2"
+ input: "bert/encoder/layer_2/attention/self/dropout/mul_1"
+ input: "gradients/bert/encoder/layer_2/attention/self/transpose_3_grad/transpose"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/strided_slice/stack"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/strided_slice/stack_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: -2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/strided_slice/stack_2"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/strided_slice"
+ op: "StridedSlice"
+ input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/Shape"
+ input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/strided_slice/stack"
+ input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/strided_slice/stack_1"
+ input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/strided_slice/stack_2"
+ attr {
+ key: "Index"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "begin_mask"
+ value {
+ i: 1
+ }
+ }
+ attr {
+ key: "ellipsis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "end_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "new_axis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "shrink_axis_mask"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/strided_slice_1/stack"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/strided_slice_1/stack_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: -2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/strided_slice_1/stack_2"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/strided_slice_1"
+ op: "StridedSlice"
+ input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/strided_slice_1/stack"
+ input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/strided_slice_1/stack_1"
+ input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/strided_slice_1/stack_2"
+ attr {
+ key: "Index"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "begin_mask"
+ value {
+ i: 1
+ }
+ }
+ attr {
+ key: "ellipsis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "end_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "new_axis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "shrink_axis_mask"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/strided_slice"
+ input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/strided_slice_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/MatMul"
+ input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/Sum"
+ input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/MatMul_1"
+ input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/Reshape"
+ input: "bert/encoder/layer_2/attention/self/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/Reshape"
+ input: "bert/encoder/layer_2/attention/self/dropout/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/transpose_2_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_2/attention/self/transpose_2/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/transpose_2_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_2/attention/self/transpose_2_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_1_grad/Mul"
+ input: "bert/encoder/layer_2/attention/self/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_2/attention/self/Softmax"
+ input: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_1_grad/Mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/Reshape_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/Reshape_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/attention/self/transpose_2_grad/transpose"
+ input: "gradients/bert/encoder/layer_2/attention/self/Reshape_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/Softmax_grad/mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/Reshape"
+ input: "bert/encoder/layer_2/attention/self/Softmax"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/Softmax_grad/Sum/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: -1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/Softmax_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_2/attention/self/Softmax_grad/mul"
+ input: "gradients/bert/encoder/layer_2/attention/self/Softmax_grad/Sum/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/Softmax_grad/sub"
+ op: "Sub"
+ input: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/Reshape"
+ input: "gradients/bert/encoder/layer_2/attention/self/Softmax_grad/Sum"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/Softmax_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_2/attention/self/Softmax_grad/sub"
+ input: "bert/encoder/layer_2/attention/self/Softmax"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_2/attention/self/Reshape_2_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/add_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/add_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\001\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/add_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_2/attention/self/add_grad/Shape"
+ input: "gradients/bert/encoder/layer_2/attention/self/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/add_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_2/attention/self/Softmax_grad/mul_1"
+ input: "gradients/bert/encoder/layer_2/attention/self/add_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/add_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/attention/self/add_grad/Sum"
+ input: "gradients/bert/encoder/layer_2/attention/self/add_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/add_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_2/attention/self/Softmax_grad/mul_1"
+ input: "gradients/bert/encoder/layer_2/attention/self/add_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/add_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/attention/self/add_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_2/attention/self/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/value/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_2/attention/self/Reshape_2_grad/Reshape"
+ input: "bert/encoder/layer_2/attention/self/value/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/value/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/bert/encoder/layer_2/attention/self/Reshape_2_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_2/attention/self/add_grad/Reshape"
+ input: "bert/encoder/layer_2/attention/self/Mul/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_2/attention/self/MatMul"
+ input: "gradients/bert/encoder/layer_2/attention/self/add_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/MatMul_grad/MatMul"
+ op: "BatchMatMulV2"
+ input: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Reshape"
+ input: "bert/encoder/layer_2/attention/self/transpose_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/MatMul_grad/MatMul_1"
+ op: "BatchMatMulV2"
+ input: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Reshape"
+ input: "bert/encoder/layer_2/attention/self/transpose"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/transpose_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_2/attention/self/transpose/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/transpose_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_2/attention/self/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_2/attention/self/transpose_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/transpose_1_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_2/attention/self/transpose_1/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/transpose_1_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_2/attention/self/MatMul_grad/MatMul_1"
+ input: "gradients/bert/encoder/layer_2/attention/self/transpose_1_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/Reshape_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/Reshape_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/attention/self/transpose_grad/transpose"
+ input: "gradients/bert/encoder/layer_2/attention/self/Reshape_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/Reshape_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/Reshape_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_2/attention/self/transpose_1_grad/transpose"
+ input: "gradients/bert/encoder/layer_2/attention/self/Reshape_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_2/attention/self/Reshape_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_2/attention/self/Reshape_1_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/query/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_2/attention/self/Reshape_grad/Reshape"
+ input: "bert/encoder/layer_2/attention/self/query/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/query/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/bert/encoder/layer_2/attention/self/Reshape_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/key/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_2/attention/self/Reshape_1_grad/Reshape"
+ input: "bert/encoder/layer_2/attention/self/key/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_2/attention/self/key/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/bert/encoder/layer_2/attention/self/Reshape_1_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/AddN_69"
+ op: "AddN"
+ input: "gradients/AddN_68"
+ input: "gradients/bert/encoder/layer_2/attention/self/value/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_2/attention/self/query/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_2/attention/self/key/MatMul_grad/MatMul"
+ attr {
+ key: "N"
+ value {
+ i: 4
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_69"
+ input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_69"
+ input: "bert/encoder/layer_1/output/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Shape"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Sum"
+ op: "Sum"
+ input: "gradients/AddN_69"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Sum"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/AddN_69"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Neg"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_1/output/LayerNorm/moments/mean"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_70"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_70"
+ input: "bert/encoder/layer_1/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/AddN_70"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ op: "RsqrtGrad"
+ input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/Shape"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/Sum"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_1/output/LayerNorm/moments/variance/reduction_indices"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/add"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/range/start"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Size"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/range"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/mod"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/Reshape"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Reshape"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Tile"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ op: "Const"
+ input: "^gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 2.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_1/output/add"
+ input: "bert/encoder/layer_1/output/LayerNorm/moments/StopGradient"
+ input: "^gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_1/output/LayerNorm/moments/mean/reduction_indices"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/add"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/range/start"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Size"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/range"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/mod"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Reshape"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Tile"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_71"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/truediv"
+ attr {
+ key: "N"
+ value {
+ i: 3
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/dropout/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_71"
+ input: "bert/encoder/layer_1/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/dropout/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_71"
+ input: "bert/encoder/layer_1/output/dropout/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_1/output/dropout/mul_1_grad/Mul"
+ input: "bert/encoder/layer_1/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_1/output/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_1/output/dropout/mul_1_grad/Mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/dense/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/dense/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Reshape"
+ input: "bert/encoder/layer_1/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/output/dense/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_1/intermediate/dense/mul_3"
+ input: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_3_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_1/output/dense/MatMul_grad/MatMul"
+ input: "bert/encoder/layer_1/intermediate/dense/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_3_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_1/output/dense/MatMul_grad/MatMul"
+ input: "bert/encoder/layer_1/intermediate/dense/BiasAdd"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/Shape"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_3_grad/Mul_1"
+ input: "bert/encoder/layer_1/intermediate/dense/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/Mul"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/Sum"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_1/intermediate/dense/mul_2/x"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_3_grad/Mul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/add_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/add_1_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/add_1_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/add_1_grad/Shape"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/add_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/add_1_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/add_1_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/add_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/add_1_grad/Sum"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/add_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/add_1_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/add_1_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/add_1_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/add_1_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/add_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/Tanh_grad/TanhGrad"
+ op: "TanhGrad"
+ input: "bert/encoder/layer_1/intermediate/dense/Tanh"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/add_1_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Shape"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/Tanh_grad/TanhGrad"
+ input: "bert/encoder/layer_1/intermediate/dense/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Mul"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Sum"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_1/intermediate/dense/mul_1/x"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/Tanh_grad/TanhGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Reshape_1"
+ input: "bert/encoder/layer_1/intermediate/dense/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_1/intermediate/dense/mul/x"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Shape"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Reshape_1"
+ input: "bert/encoder/layer_1/intermediate/dense/Pow/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/sub/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_1/intermediate/dense/Pow/y"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/sub/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Pow"
+ op: "Pow"
+ input: "bert/encoder/layer_1/intermediate/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/mul"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/mul_1"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Sum"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Greater/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Greater"
+ op: "Greater"
+ input: "bert/encoder/layer_1/intermediate/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Greater/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/ones_like/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/ones_like/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/ones_like"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/ones_like/Shape"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/ones_like/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Select"
+ op: "Select"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Greater"
+ input: "bert/encoder/layer_1/intermediate/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/ones_like"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Log"
+ op: "Log"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/zeros_like/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/zeros_like"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/zeros_like/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Select_1"
+ op: "Select"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Greater"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Log"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/zeros_like"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/mul_2"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Reshape_1"
+ input: "bert/encoder/layer_1/intermediate/dense/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/mul_3"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/mul_2"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Select_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/mul_3"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_72"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_3_grad/Mul"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Reshape"
+ attr {
+ key: "N"
+ value {
+ i: 3
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/intermediate/dense/mul_3_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/AddN_72"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/AddN_72"
+ input: "bert/encoder/layer_1/intermediate/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/intermediate/dense/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/AddN_72"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/AddN_73"
+ op: "AddN"
+ input: "gradients/AddN_71"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/MatMul_grad/MatMul"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_73"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_73"
+ input: "bert/encoder/layer_1/attention/output/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Shape"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Sum"
+ op: "Sum"
+ input: "gradients/AddN_73"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Sum"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/AddN_73"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Neg"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/moments/mean"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_74"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_74"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/AddN_74"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ op: "RsqrtGrad"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/Shape"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/Sum"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/moments/variance/reduction_indices"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/add"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/range/start"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Size"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/range"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/mod"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/Reshape"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Reshape"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Tile"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ op: "Const"
+ input: "^gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 2.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_1/attention/output/add"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/moments/StopGradient"
+ input: "^gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/moments/mean/reduction_indices"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/add"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/range/start"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Size"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/range"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/mod"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Reshape"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Tile"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_75"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/truediv"
+ attr {
+ key: "N"
+ value {
+ i: 3
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_75"
+ input: "bert/encoder/layer_1/attention/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_75"
+ input: "bert/encoder/layer_1/attention/output/dropout/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_1_grad/Mul"
+ input: "bert/encoder/layer_1/attention/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_1/attention/output/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_1_grad/Mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/dense/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Reshape"
+ input: "bert/encoder/layer_1/attention/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/output/dense/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_1/attention/self/Reshape_3"
+ input: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/Reshape_3_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/Reshape_3_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/attention/output/dense/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_1/attention/self/Reshape_3_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/transpose_3_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_1/attention/self/transpose_3/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/transpose_3_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_1/attention/self/Reshape_3_grad/Reshape"
+ input: "gradients/bert/encoder/layer_1/attention/self/transpose_3_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/MatMul"
+ op: "BatchMatMulV2"
+ input: "gradients/bert/encoder/layer_1/attention/self/transpose_3_grad/transpose"
+ input: "bert/encoder/layer_1/attention/self/transpose_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/MatMul_1"
+ op: "BatchMatMulV2"
+ input: "bert/encoder/layer_1/attention/self/dropout/mul_1"
+ input: "gradients/bert/encoder/layer_1/attention/self/transpose_3_grad/transpose"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/strided_slice/stack"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/strided_slice/stack_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: -2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/strided_slice/stack_2"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/strided_slice"
+ op: "StridedSlice"
+ input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/Shape"
+ input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/strided_slice/stack"
+ input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/strided_slice/stack_1"
+ input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/strided_slice/stack_2"
+ attr {
+ key: "Index"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "begin_mask"
+ value {
+ i: 1
+ }
+ }
+ attr {
+ key: "ellipsis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "end_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "new_axis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "shrink_axis_mask"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/strided_slice_1/stack"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/strided_slice_1/stack_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: -2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/strided_slice_1/stack_2"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/strided_slice_1"
+ op: "StridedSlice"
+ input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/strided_slice_1/stack"
+ input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/strided_slice_1/stack_1"
+ input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/strided_slice_1/stack_2"
+ attr {
+ key: "Index"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "begin_mask"
+ value {
+ i: 1
+ }
+ }
+ attr {
+ key: "ellipsis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "end_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "new_axis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "shrink_axis_mask"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/strided_slice"
+ input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/strided_slice_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/MatMul"
+ input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/Sum"
+ input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/MatMul_1"
+ input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/Reshape"
+ input: "bert/encoder/layer_1/attention/self/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/Reshape"
+ input: "bert/encoder/layer_1/attention/self/dropout/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/transpose_2_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_1/attention/self/transpose_2/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/transpose_2_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_1/attention/self/transpose_2_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_1_grad/Mul"
+ input: "bert/encoder/layer_1/attention/self/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_1/attention/self/Softmax"
+ input: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_1_grad/Mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/Reshape_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/Reshape_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/attention/self/transpose_2_grad/transpose"
+ input: "gradients/bert/encoder/layer_1/attention/self/Reshape_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/Softmax_grad/mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/Reshape"
+ input: "bert/encoder/layer_1/attention/self/Softmax"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/Softmax_grad/Sum/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: -1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/Softmax_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_1/attention/self/Softmax_grad/mul"
+ input: "gradients/bert/encoder/layer_1/attention/self/Softmax_grad/Sum/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/Softmax_grad/sub"
+ op: "Sub"
+ input: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/Reshape"
+ input: "gradients/bert/encoder/layer_1/attention/self/Softmax_grad/Sum"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/Softmax_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_1/attention/self/Softmax_grad/sub"
+ input: "bert/encoder/layer_1/attention/self/Softmax"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_1/attention/self/Reshape_2_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/add_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/add_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\001\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/add_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_1/attention/self/add_grad/Shape"
+ input: "gradients/bert/encoder/layer_1/attention/self/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/add_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_1/attention/self/Softmax_grad/mul_1"
+ input: "gradients/bert/encoder/layer_1/attention/self/add_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/add_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/attention/self/add_grad/Sum"
+ input: "gradients/bert/encoder/layer_1/attention/self/add_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/add_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_1/attention/self/Softmax_grad/mul_1"
+ input: "gradients/bert/encoder/layer_1/attention/self/add_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/add_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/attention/self/add_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_1/attention/self/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/value/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_1/attention/self/Reshape_2_grad/Reshape"
+ input: "bert/encoder/layer_1/attention/self/value/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/value/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/bert/encoder/layer_1/attention/self/Reshape_2_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_1/attention/self/add_grad/Reshape"
+ input: "bert/encoder/layer_1/attention/self/Mul/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_1/attention/self/MatMul"
+ input: "gradients/bert/encoder/layer_1/attention/self/add_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/MatMul_grad/MatMul"
+ op: "BatchMatMulV2"
+ input: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Reshape"
+ input: "bert/encoder/layer_1/attention/self/transpose_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/MatMul_grad/MatMul_1"
+ op: "BatchMatMulV2"
+ input: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Reshape"
+ input: "bert/encoder/layer_1/attention/self/transpose"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/transpose_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_1/attention/self/transpose/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/transpose_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_1/attention/self/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_1/attention/self/transpose_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/transpose_1_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_1/attention/self/transpose_1/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/transpose_1_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_1/attention/self/MatMul_grad/MatMul_1"
+ input: "gradients/bert/encoder/layer_1/attention/self/transpose_1_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/Reshape_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/Reshape_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/attention/self/transpose_grad/transpose"
+ input: "gradients/bert/encoder/layer_1/attention/self/Reshape_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/Reshape_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/Reshape_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_1/attention/self/transpose_1_grad/transpose"
+ input: "gradients/bert/encoder/layer_1/attention/self/Reshape_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_1/attention/self/Reshape_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_1/attention/self/Reshape_1_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/query/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_1/attention/self/Reshape_grad/Reshape"
+ input: "bert/encoder/layer_1/attention/self/query/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/query/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/bert/encoder/layer_1/attention/self/Reshape_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/key/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_1/attention/self/Reshape_1_grad/Reshape"
+ input: "bert/encoder/layer_1/attention/self/key/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_1/attention/self/key/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/bert/encoder/layer_1/attention/self/Reshape_1_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/AddN_76"
+ op: "AddN"
+ input: "gradients/AddN_75"
+ input: "gradients/bert/encoder/layer_1/attention/self/value/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_1/attention/self/query/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_1/attention/self/key/MatMul_grad/MatMul"
+ attr {
+ key: "N"
+ value {
+ i: 4
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_76"
+ input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_76"
+ input: "bert/encoder/layer_0/output/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Shape"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Sum"
+ op: "Sum"
+ input: "gradients/AddN_76"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Sum"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/AddN_76"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Neg"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_0/output/LayerNorm/moments/mean"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_77"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_77"
+ input: "bert/encoder/layer_0/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/AddN_77"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ op: "RsqrtGrad"
+ input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/Shape"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/Sum"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_0/output/LayerNorm/moments/variance/reduction_indices"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/add"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/range/start"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Size"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/range"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/mod"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/Reshape"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Reshape"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Tile"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ op: "Const"
+ input: "^gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 2.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_0/output/add"
+ input: "bert/encoder/layer_0/output/LayerNorm/moments/StopGradient"
+ input: "^gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_0/output/LayerNorm/moments/mean/reduction_indices"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/add"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/range/start"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Size"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/range"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/mod"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Reshape"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Tile"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_78"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/truediv"
+ attr {
+ key: "N"
+ value {
+ i: 3
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/dropout/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_78"
+ input: "bert/encoder/layer_0/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/dropout/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_78"
+ input: "bert/encoder/layer_0/output/dropout/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_0/output/dropout/mul_1_grad/Mul"
+ input: "bert/encoder/layer_0/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_0/output/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_0/output/dropout/mul_1_grad/Mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/dense/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/dense/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Reshape"
+ input: "bert/encoder/layer_0/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/output/dense/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_0/intermediate/dense/mul_3"
+ input: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_3_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_0/output/dense/MatMul_grad/MatMul"
+ input: "bert/encoder/layer_0/intermediate/dense/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_3_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_0/output/dense/MatMul_grad/MatMul"
+ input: "bert/encoder/layer_0/intermediate/dense/BiasAdd"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/Shape"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_3_grad/Mul_1"
+ input: "bert/encoder/layer_0/intermediate/dense/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/Mul"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/Sum"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_0/intermediate/dense/mul_2/x"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_3_grad/Mul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/add_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/add_1_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/add_1_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/add_1_grad/Shape"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/add_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/add_1_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/add_1_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/add_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/add_1_grad/Sum"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/add_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/add_1_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/add_1_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/add_1_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/add_1_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/add_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/Tanh_grad/TanhGrad"
+ op: "TanhGrad"
+ input: "bert/encoder/layer_0/intermediate/dense/Tanh"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/add_1_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Shape"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/Tanh_grad/TanhGrad"
+ input: "bert/encoder/layer_0/intermediate/dense/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Mul"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Sum"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_0/intermediate/dense/mul_1/x"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/Tanh_grad/TanhGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Reshape_1"
+ input: "bert/encoder/layer_0/intermediate/dense/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_0/intermediate/dense/mul/x"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Shape"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Reshape_1"
+ input: "bert/encoder/layer_0/intermediate/dense/Pow/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/sub/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_0/intermediate/dense/Pow/y"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/sub/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Pow"
+ op: "Pow"
+ input: "bert/encoder/layer_0/intermediate/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/mul"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/mul_1"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Sum"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Greater/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Greater"
+ op: "Greater"
+ input: "bert/encoder/layer_0/intermediate/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Greater/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/ones_like/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/ones_like/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/ones_like"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/ones_like/Shape"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/ones_like/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Select"
+ op: "Select"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Greater"
+ input: "bert/encoder/layer_0/intermediate/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/ones_like"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Log"
+ op: "Log"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/zeros_like/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/zeros_like"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/zeros_like/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Select_1"
+ op: "Select"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Greater"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Log"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/zeros_like"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/mul_2"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Reshape_1"
+ input: "bert/encoder/layer_0/intermediate/dense/Pow"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/mul_3"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/mul_2"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Select_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/mul_3"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_79"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_3_grad/Mul"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Reshape"
+ attr {
+ key: "N"
+ value {
+ i: 3
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/intermediate/dense/mul_3_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/AddN_79"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/AddN_79"
+ input: "bert/encoder/layer_0/intermediate/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/intermediate/dense/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_1"
+ input: "gradients/AddN_79"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/AddN_80"
+ op: "AddN"
+ input: "gradients/AddN_78"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/MatMul_grad/MatMul"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_80"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_80"
+ input: "bert/encoder/layer_0/attention/output/add"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Shape"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Sum"
+ op: "Sum"
+ input: "gradients/AddN_80"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Sum"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/AddN_80"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Neg"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/moments/mean"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_81"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_81"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/AddN_81"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ op: "RsqrtGrad"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/Shape"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/Sum"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/moments/variance/reduction_indices"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/add"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/range/start"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Size"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/range"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/mod"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/Reshape"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Reshape"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Tile"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ op: "Const"
+ input: "^gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 2.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ op: "Sub"
+ input: "bert/encoder/layer_0/attention/output/add"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/moments/StopGradient"
+ input: "^gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/add"
+ op: "Add"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/moments/mean/reduction_indices"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/add"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/range"
+ op: "Range"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/range/start"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Size"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/range"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/mod"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Reshape"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Tile"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_82"
+ op: "AddN"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/truediv"
+ attr {
+ key: "N"
+ value {
+ i: 3
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_82"
+ input: "bert/encoder/layer_0/attention/output/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/AddN_82"
+ input: "bert/encoder/layer_0/attention/output/dropout/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_1_grad/Mul"
+ input: "bert/encoder/layer_0/attention/output/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_0/attention/output/dense/BiasAdd"
+ input: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_1_grad/Mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/dense/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Reshape"
+ input: "bert/encoder/layer_0/attention/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/output/dense/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/layer_0/attention/self/Reshape_3"
+ input: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/Reshape_3_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/Reshape_3_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/attention/output/dense/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_0/attention/self/Reshape_3_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/transpose_3_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_0/attention/self/transpose_3/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/transpose_3_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_0/attention/self/Reshape_3_grad/Reshape"
+ input: "gradients/bert/encoder/layer_0/attention/self/transpose_3_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/MatMul"
+ op: "BatchMatMulV2"
+ input: "gradients/bert/encoder/layer_0/attention/self/transpose_3_grad/transpose"
+ input: "bert/encoder/layer_0/attention/self/transpose_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/MatMul_1"
+ op: "BatchMatMulV2"
+ input: "bert/encoder/layer_0/attention/self/dropout/mul_1"
+ input: "gradients/bert/encoder/layer_0/attention/self/transpose_3_grad/transpose"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000@\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/strided_slice/stack"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/strided_slice/stack_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: -2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/strided_slice/stack_2"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/strided_slice"
+ op: "StridedSlice"
+ input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/Shape"
+ input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/strided_slice/stack"
+ input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/strided_slice/stack_1"
+ input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/strided_slice/stack_2"
+ attr {
+ key: "Index"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "begin_mask"
+ value {
+ i: 1
+ }
+ }
+ attr {
+ key: "ellipsis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "end_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "new_axis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "shrink_axis_mask"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/strided_slice_1/stack"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/strided_slice_1/stack_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: -2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/strided_slice_1/stack_2"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/strided_slice_1"
+ op: "StridedSlice"
+ input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/Shape_1"
+ input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/strided_slice_1/stack"
+ input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/strided_slice_1/stack_1"
+ input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/strided_slice_1/stack_2"
+ attr {
+ key: "Index"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "begin_mask"
+ value {
+ i: 1
+ }
+ }
+ attr {
+ key: "ellipsis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "end_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "new_axis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "shrink_axis_mask"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/strided_slice"
+ input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/strided_slice_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/MatMul"
+ input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/Sum"
+ input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/MatMul_1"
+ input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/Reshape"
+ input: "bert/encoder/layer_0/attention/self/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/Reshape"
+ input: "bert/encoder/layer_0/attention/self/dropout/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/transpose_2_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_0/attention/self/transpose_2/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/transpose_2_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/Reshape_1"
+ input: "gradients/bert/encoder/layer_0/attention/self/transpose_2_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_1_grad/Mul"
+ input: "bert/encoder/layer_0/attention/self/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_0/attention/self/Softmax"
+ input: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_1_grad/Mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/Reshape_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/Reshape_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/attention/self/transpose_2_grad/transpose"
+ input: "gradients/bert/encoder/layer_0/attention/self/Reshape_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/Softmax_grad/mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/Reshape"
+ input: "bert/encoder/layer_0/attention/self/Softmax"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/Softmax_grad/Sum/reduction_indices"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: -1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/Softmax_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_0/attention/self/Softmax_grad/mul"
+ input: "gradients/bert/encoder/layer_0/attention/self/Softmax_grad/Sum/reduction_indices"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/Softmax_grad/sub"
+ op: "Sub"
+ input: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/Reshape"
+ input: "gradients/bert/encoder/layer_0/attention/self/Softmax_grad/Sum"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/Softmax_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_0/attention/self/Softmax_grad/sub"
+ input: "bert/encoder/layer_0/attention/self/Softmax"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_0/attention/self/Reshape_2_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/add_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/add_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\001\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/add_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_0/attention/self/add_grad/Shape"
+ input: "gradients/bert/encoder/layer_0/attention/self/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/add_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_0/attention/self/Softmax_grad/mul_1"
+ input: "gradients/bert/encoder/layer_0/attention/self/add_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/add_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/attention/self/add_grad/Sum"
+ input: "gradients/bert/encoder/layer_0/attention/self/add_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/add_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_0/attention/self/Softmax_grad/mul_1"
+ input: "gradients/bert/encoder/layer_0/attention/self/add_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/add_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/attention/self/add_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_0/attention/self/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/value/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_0/attention/self/Reshape_2_grad/Reshape"
+ input: "bert/encoder/layer_0/attention/self/value/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/value/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/Reshape_1"
+ input: "gradients/bert/encoder/layer_0/attention/self/Reshape_2_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Shape"
+ input: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_0/attention/self/add_grad/Reshape"
+ input: "bert/encoder/layer_0/attention/self/Mul/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Mul"
+ input: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Sum"
+ input: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/encoder/layer_0/attention/self/MatMul"
+ input: "gradients/bert/encoder/layer_0/attention/self/add_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Mul_1"
+ input: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Sum_1"
+ input: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/MatMul_grad/MatMul"
+ op: "BatchMatMulV2"
+ input: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Reshape"
+ input: "bert/encoder/layer_0/attention/self/transpose_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/MatMul_grad/MatMul_1"
+ op: "BatchMatMulV2"
+ input: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Reshape"
+ input: "bert/encoder/layer_0/attention/self/transpose"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "adj_x"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "adj_y"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/transpose_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_0/attention/self/transpose/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/transpose_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_0/attention/self/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_0/attention/self/transpose_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/transpose_1_grad/InvertPermutation"
+ op: "InvertPermutation"
+ input: "bert/encoder/layer_0/attention/self/transpose_1/perm"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/transpose_1_grad/transpose"
+ op: "Transpose"
+ input: "gradients/bert/encoder/layer_0/attention/self/MatMul_grad/MatMul_1"
+ input: "gradients/bert/encoder/layer_0/attention/self/transpose_1_grad/InvertPermutation"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tperm"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 12
+ }
+ dim {
+ size: 64
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/Reshape_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/Reshape_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/attention/self/transpose_grad/transpose"
+ input: "gradients/bert/encoder/layer_0/attention/self/Reshape_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/Reshape_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/Reshape_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/encoder/layer_0/attention/self/transpose_1_grad/transpose"
+ input: "gradients/bert/encoder/layer_0/attention/self/Reshape_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_0/attention/self/Reshape_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ op: "BiasAddGrad"
+ input: "gradients/bert/encoder/layer_0/attention/self/Reshape_1_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/query/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_0/attention/self/Reshape_grad/Reshape"
+ input: "bert/encoder/layer_0/attention/self/query/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/query/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/Reshape_1"
+ input: "gradients/bert/encoder/layer_0/attention/self/Reshape_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/key/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/encoder/layer_0/attention/self/Reshape_1_grad/Reshape"
+ input: "bert/encoder/layer_0/attention/self/key/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/layer_0/attention/self/key/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/encoder/Reshape_1"
+ input: "gradients/bert/encoder/layer_0/attention/self/Reshape_1_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/AddN_83"
+ op: "AddN"
+ input: "gradients/AddN_82"
+ input: "gradients/bert/encoder/layer_0/attention/self/value/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_0/attention/self/query/MatMul_grad/MatMul"
+ input: "gradients/bert/encoder/layer_0/attention/self/key/MatMul_grad/MatMul"
+ attr {
+ key: "N"
+ value {
+ i: 4
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/Reshape_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 3
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/encoder/Reshape_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/AddN_83"
+ input: "gradients/bert/encoder/Reshape_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/dropout/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/encoder/Reshape_1_grad/Reshape"
+ input: "bert/embeddings/dropout/Cast"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/dropout/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/bert/encoder/Reshape_1_grad/Reshape"
+ input: "bert/embeddings/dropout/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/dropout/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 3
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/dropout/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/dropout/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/embeddings/dropout/mul_grad/Shape"
+ input: "gradients/bert/embeddings/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/dropout/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/embeddings/dropout/mul_1_grad/Mul"
+ input: "bert/embeddings/dropout/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/dropout/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/embeddings/dropout/mul_grad/Mul"
+ input: "gradients/bert/embeddings/dropout/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/dropout/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/embeddings/dropout/mul_grad/Sum"
+ input: "gradients/bert/embeddings/dropout/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/dropout/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/embeddings/LayerNorm/batchnorm/add_1"
+ input: "gradients/bert/embeddings/dropout/mul_1_grad/Mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/dropout/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/embeddings/dropout/mul_grad/Mul_1"
+ input: "gradients/bert/embeddings/dropout/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/dropout/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/embeddings/dropout/mul_grad/Sum_1"
+ input: "gradients/bert/embeddings/dropout/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_1_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/embeddings/dropout/mul_grad/Reshape"
+ input: "bert/embeddings/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ op: "Mul"
+ input: "gradients/bert/embeddings/dropout/mul_grad/Reshape"
+ input: "bert/embeddings/add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 3
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Shape"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/embeddings/dropout/mul_grad/Reshape"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Sum"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/embeddings/dropout/mul_grad/Reshape"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Sum_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Neg"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 3
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 3
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Shape"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ input: "bert/embeddings/LayerNorm/batchnorm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Mul"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Sum"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ op: "Mul"
+ input: "bert/embeddings/LayerNorm/moments/mean"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Mul_1"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Sum_1"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_84"
+ op: "AddN"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Reshape_1"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/LayerNorm/batchnorm/mul_1_grad/Mul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 3
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 768
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Shape"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Mul"
+ op: "Mul"
+ input: "gradients/AddN_84"
+ input: "bert/embeddings/LayerNorm/gamma/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Mul"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Sum"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Mul_1"
+ op: "Mul"
+ input: "bert/embeddings/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/AddN_84"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Mul_1"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Sum_1"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ op: "RsqrtGrad"
+ input: "bert/embeddings/LayerNorm/batchnorm/Rsqrt"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 3
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Shape"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Sum"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Sum_1"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 3
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 3
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/add"
+ op: "Add"
+ input: "bert/embeddings/LayerNorm/moments/variance/reduction_indices"
+ input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/add"
+ input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/range"
+ op: "Range"
+ input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/range/start"
+ input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Size"
+ input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape_1"
+ input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/range"
+ input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/mod"
+ input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/DynamicStitch"
+ input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape"
+ input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Reshape"
+ input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Reshape"
+ input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Tile"
+ input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 3
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 3
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\001\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Shape"
+ input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/scalar"
+ op: "Const"
+ input: "^gradients/bert/embeddings/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 2.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Mul"
+ op: "Mul"
+ input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/scalar"
+ input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/sub"
+ op: "Sub"
+ input: "bert/embeddings/add_1"
+ input: "bert/embeddings/LayerNorm/moments/StopGradient"
+ input: "^gradients/bert/embeddings/LayerNorm/moments/variance_grad/truediv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ op: "Mul"
+ input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Mul"
+ input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/sub"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Sum"
+ op: "Sum"
+ input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Sum"
+ input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/mul_1"
+ input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Sum_1"
+ input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Neg"
+ op: "Neg"
+ input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 3
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Size"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 3
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/add"
+ op: "Add"
+ input: "bert/embeddings/LayerNorm/moments/mean/reduction_indices"
+ input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/mod"
+ op: "FloorMod"
+ input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/add"
+ input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Size"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/range/start"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/range/delta"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/range"
+ op: "Range"
+ input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/range/start"
+ input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Size"
+ input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/range/delta"
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Fill/value"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Fill"
+ op: "Fill"
+ input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape_1"
+ input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Fill/value"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/DynamicStitch"
+ op: "DynamicStitch"
+ input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/range"
+ input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/mod"
+ input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Fill"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Maximum/y"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Maximum"
+ op: "Maximum"
+ input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/DynamicStitch"
+ input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Maximum/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/floordiv"
+ op: "FloorDiv"
+ input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape"
+ input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Maximum"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Reshape"
+ input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/DynamicStitch"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Tile"
+ op: "Tile"
+ input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Reshape"
+ input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/floordiv"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tmultiples"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 768.0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/truediv"
+ op: "RealDiv"
+ input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Tile"
+ input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/AddN_85"
+ op: "AddN"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_1_grad/Mul"
+ input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Reshape"
+ input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/truediv"
+ attr {
+ key: "N"
+ value {
+ i: 3
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/LayerNorm/batchnorm/mul_1_grad/Mul"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/add_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 3
+ }
+ }
+ tensor_content: " \000\000\000\200\000\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/add_1_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 3
+ }
+ }
+ tensor_content: "\001\000\000\000\200\000\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/add_1_grad/BroadcastGradientArgs"
+ op: "BroadcastGradientArgs"
+ input: "gradients/bert/embeddings/add_1_grad/Shape"
+ input: "gradients/bert/embeddings/add_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/add_1_grad/Sum"
+ op: "Sum"
+ input: "gradients/AddN_85"
+ input: "gradients/bert/embeddings/add_1_grad/BroadcastGradientArgs"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/add_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/embeddings/add_1_grad/Sum"
+ input: "gradients/bert/embeddings/add_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 32
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/add_1_grad/Sum_1"
+ op: "Sum"
+ input: "gradients/AddN_85"
+ input: "gradients/bert/embeddings/add_1_grad/BroadcastGradientArgs:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/add_1_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/embeddings/add_1_grad/Sum_1"
+ input: "gradients/bert/embeddings/add_1_grad/Shape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/Reshape_4_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\200\000\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/Reshape_4_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/embeddings/add_1_grad/Reshape_1"
+ input: "gradients/bert/embeddings/Reshape_4_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/Reshape_1_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/Reshape_1_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/embeddings/add_1_grad/Reshape"
+ input: "gradients/bert/embeddings/Reshape_1_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/Reshape_3_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\020\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/Reshape_3_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/embeddings/add_1_grad/Reshape"
+ input: "gradients/bert/embeddings/Reshape_3_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/Slice_grad/Rank"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 2
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/Slice_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\200\000\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/Slice_grad/stack/1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/Slice_grad/stack"
+ op: "Pack"
+ input: "gradients/bert/embeddings/Slice_grad/Rank"
+ input: "gradients/bert/embeddings/Slice_grad/stack/1"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "axis"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/Slice_grad/Reshape"
+ op: "Reshape"
+ input: "bert/embeddings/Slice/begin"
+ input: "gradients/bert/embeddings/Slice_grad/stack"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/Slice_grad/Shape_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\002\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/Slice_grad/sub"
+ op: "Sub"
+ input: "gradients/bert/embeddings/Slice_grad/Shape_1"
+ input: "gradients/bert/embeddings/Slice_grad/Shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/Slice_grad/sub_1"
+ op: "Sub"
+ input: "gradients/bert/embeddings/Slice_grad/sub"
+ input: "bert/embeddings/Slice/begin"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/Slice_grad/Reshape_1"
+ op: "Reshape"
+ input: "gradients/bert/embeddings/Slice_grad/sub_1"
+ input: "gradients/bert/embeddings/Slice_grad/stack"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/Slice_grad/concat/axis"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/Slice_grad/concat"
+ op: "ConcatV2"
+ input: "gradients/bert/embeddings/Slice_grad/Reshape"
+ input: "gradients/bert/embeddings/Slice_grad/Reshape_1"
+ input: "gradients/bert/embeddings/Slice_grad/concat/axis"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/Slice_grad/Pad"
+ op: "Pad"
+ input: "gradients/bert/embeddings/Reshape_4_grad/Reshape"
+ input: "gradients/bert/embeddings/Slice_grad/concat"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tpaddings"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/GatherV2_grad/Shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/word_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT64
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT64
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\210R\000\000\000\000\000\000\000\003\000\000\000\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/GatherV2_grad/Cast"
+ op: "Cast"
+ input: "gradients/bert/embeddings/GatherV2_grad/Shape"
+ attr {
+ key: "DstT"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "SrcT"
+ value {
+ type: DT_INT64
+ }
+ }
+ attr {
+ key: "Truncate"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/word_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/GatherV2_grad/Size"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 4096
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/GatherV2_grad/ExpandDims/dim"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/GatherV2_grad/ExpandDims"
+ op: "ExpandDims"
+ input: "gradients/bert/embeddings/GatherV2_grad/Size"
+ input: "gradients/bert/embeddings/GatherV2_grad/ExpandDims/dim"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "Tdim"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/GatherV2_grad/strided_slice/stack"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/GatherV2_grad/strided_slice/stack_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/GatherV2_grad/strided_slice/stack_2"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/GatherV2_grad/strided_slice"
+ op: "StridedSlice"
+ input: "gradients/bert/embeddings/GatherV2_grad/Cast"
+ input: "gradients/bert/embeddings/GatherV2_grad/strided_slice/stack"
+ input: "gradients/bert/embeddings/GatherV2_grad/strided_slice/stack_1"
+ input: "gradients/bert/embeddings/GatherV2_grad/strided_slice/stack_2"
+ attr {
+ key: "Index"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "begin_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "ellipsis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "end_mask"
+ value {
+ i: 1
+ }
+ }
+ attr {
+ key: "new_axis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "shrink_axis_mask"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/GatherV2_grad/concat/axis"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/GatherV2_grad/concat"
+ op: "ConcatV2"
+ input: "gradients/bert/embeddings/GatherV2_grad/ExpandDims"
+ input: "gradients/bert/embeddings/GatherV2_grad/strided_slice"
+ input: "gradients/bert/embeddings/GatherV2_grad/concat/axis"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/GatherV2_grad/Reshape"
+ op: "Reshape"
+ input: "gradients/bert/embeddings/Reshape_1_grad/Reshape"
+ input: "gradients/bert/embeddings/GatherV2_grad/concat"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/GatherV2_grad/Reshape_1"
+ op: "Reshape"
+ input: "bert/embeddings/Reshape"
+ input: "gradients/bert/embeddings/GatherV2_grad/ExpandDims"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/MatMul_grad/MatMul"
+ op: "MatMul"
+ input: "gradients/bert/embeddings/Reshape_3_grad/Reshape"
+ input: "bert/embeddings/token_type_embeddings/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "gradients/bert/embeddings/MatMul_grad/MatMul_1"
+ op: "MatMul"
+ input: "bert/embeddings/one_hot"
+ input: "gradients/bert/embeddings/Reshape_3_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "transpose_a"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "transpose_b"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss"
+ op: "L2Loss"
+ input: "gradients/bert/embeddings/GatherV2_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/GatherV2_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_1"
+ op: "L2Loss"
+ input: "gradients/bert/embeddings/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_2"
+ op: "L2Loss"
+ input: "gradients/bert/embeddings/Slice_grad/Pad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/Slice_grad/Pad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_3"
+ op: "L2Loss"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_4"
+ op: "L2Loss"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_5"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_0/attention/self/query/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/self/query/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_6"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_0/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_7"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_0/attention/self/key/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/self/key/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_8"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_0/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_9"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_0/attention/self/value/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/self/value/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_10"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_0/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_11"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_0/attention/output/dense/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_12"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_0/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_13"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_14"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_15"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/intermediate/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_16"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_17"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_0/output/dense/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_18"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_0/output/dense/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_19"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_20"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_21"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_1/attention/self/query/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/self/query/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_22"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_1/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_23"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_1/attention/self/key/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/self/key/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_24"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_1/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_25"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_1/attention/self/value/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/self/value/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_26"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_1/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_27"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_1/attention/output/dense/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_28"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_1/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_29"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_30"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_31"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/intermediate/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_32"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_33"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_1/output/dense/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_34"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_1/output/dense/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_35"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_36"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_37"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_2/attention/self/query/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/self/query/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_38"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_2/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_39"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_2/attention/self/key/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/self/key/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_40"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_2/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_41"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_2/attention/self/value/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/self/value/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_42"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_2/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_43"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_2/attention/output/dense/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_44"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_2/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_45"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_46"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_47"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/intermediate/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_48"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_49"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_2/output/dense/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_50"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_2/output/dense/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_51"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_52"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_53"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_3/attention/self/query/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/self/query/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_54"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_3/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_55"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_3/attention/self/key/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/self/key/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_56"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_3/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_57"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_3/attention/self/value/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/self/value/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_58"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_3/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_59"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_3/attention/output/dense/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_60"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_3/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_61"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_62"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_63"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/intermediate/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_64"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_65"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_3/output/dense/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_66"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_3/output/dense/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_67"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_68"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_69"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_4/attention/self/query/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/self/query/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_70"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_4/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_71"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_4/attention/self/key/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/self/key/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_72"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_4/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_73"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_4/attention/self/value/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/self/value/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_74"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_4/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_75"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_4/attention/output/dense/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_76"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_4/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_77"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_78"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_79"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/intermediate/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_80"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_81"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_4/output/dense/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_82"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_4/output/dense/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_83"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_84"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_85"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_5/attention/self/query/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/self/query/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_86"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_5/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_87"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_5/attention/self/key/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/self/key/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_88"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_5/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_89"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_5/attention/self/value/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/self/value/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_90"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_5/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_91"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_5/attention/output/dense/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_92"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_5/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_93"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_94"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_95"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/intermediate/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_96"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_97"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_5/output/dense/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_98"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_5/output/dense/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_99"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_100"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_101"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_6/attention/self/query/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/self/query/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_102"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_6/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_103"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_6/attention/self/key/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/self/key/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_104"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_6/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_105"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_6/attention/self/value/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/self/value/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_106"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_6/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_107"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_6/attention/output/dense/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_108"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_6/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_109"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_110"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_111"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/intermediate/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_112"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_113"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_6/output/dense/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_114"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_6/output/dense/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_115"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_116"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_117"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_7/attention/self/query/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/self/query/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_118"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_7/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_119"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_7/attention/self/key/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/self/key/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_120"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_7/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_121"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_7/attention/self/value/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/self/value/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_122"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_7/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_123"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_7/attention/output/dense/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_124"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_7/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_125"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_126"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_127"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/intermediate/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_128"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_129"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_7/output/dense/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_130"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_7/output/dense/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_131"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_132"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_133"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_8/attention/self/query/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/self/query/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_134"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_8/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_135"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_8/attention/self/key/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/self/key/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_136"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_8/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_137"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_8/attention/self/value/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/self/value/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_138"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_8/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_139"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_8/attention/output/dense/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_140"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_8/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_141"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_142"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_143"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/intermediate/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_144"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_145"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_8/output/dense/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_146"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_8/output/dense/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_147"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_148"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_149"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_9/attention/self/query/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/self/query/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_150"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_9/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_151"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_9/attention/self/key/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/self/key/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_152"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_9/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_153"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_9/attention/self/value/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/self/value/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_154"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_9/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_155"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_9/attention/output/dense/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_156"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_9/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_157"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_158"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_159"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/intermediate/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_160"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_161"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_9/output/dense/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_162"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_9/output/dense/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_163"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_164"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_165"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_10/attention/self/query/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/self/query/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_166"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_10/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_167"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_10/attention/self/key/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/self/key/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_168"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_10/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_169"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_10/attention/self/value/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/self/value/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_170"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_10/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_171"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_10/attention/output/dense/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_172"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_10/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_173"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_174"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_175"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/intermediate/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_176"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_177"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_10/output/dense/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_178"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_10/output/dense/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_179"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_180"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_181"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_11/attention/self/query/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/self/query/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_182"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_11/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_183"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_11/attention/self/key/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/self/key/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_184"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_11/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_185"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_11/attention/self/value/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/self/value/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_186"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_11/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_187"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_11/attention/output/dense/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_188"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_11/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_189"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_190"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_191"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/intermediate/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_192"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_193"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_11/output/dense/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_194"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_11/output/dense/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_195"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_196"
+ op: "L2Loss"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_197"
+ op: "L2Loss"
+ input: "gradients/bert/pooler/dense/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/pooler/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_198"
+ op: "L2Loss"
+ input: "gradients/bert/pooler/dense/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/pooler/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_199"
+ op: "L2Loss"
+ input: "gradients/loss/MatMul_grad/MatMul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/loss/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/L2Loss_200"
+ op: "L2Loss"
+ input: "gradients/loss/BiasAdd_grad/BiasAddGrad"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/loss/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/stack"
+ op: "Pack"
+ input: "global_norm/L2Loss"
+ input: "global_norm/L2Loss_1"
+ input: "global_norm/L2Loss_2"
+ input: "global_norm/L2Loss_3"
+ input: "global_norm/L2Loss_4"
+ input: "global_norm/L2Loss_5"
+ input: "global_norm/L2Loss_6"
+ input: "global_norm/L2Loss_7"
+ input: "global_norm/L2Loss_8"
+ input: "global_norm/L2Loss_9"
+ input: "global_norm/L2Loss_10"
+ input: "global_norm/L2Loss_11"
+ input: "global_norm/L2Loss_12"
+ input: "global_norm/L2Loss_13"
+ input: "global_norm/L2Loss_14"
+ input: "global_norm/L2Loss_15"
+ input: "global_norm/L2Loss_16"
+ input: "global_norm/L2Loss_17"
+ input: "global_norm/L2Loss_18"
+ input: "global_norm/L2Loss_19"
+ input: "global_norm/L2Loss_20"
+ input: "global_norm/L2Loss_21"
+ input: "global_norm/L2Loss_22"
+ input: "global_norm/L2Loss_23"
+ input: "global_norm/L2Loss_24"
+ input: "global_norm/L2Loss_25"
+ input: "global_norm/L2Loss_26"
+ input: "global_norm/L2Loss_27"
+ input: "global_norm/L2Loss_28"
+ input: "global_norm/L2Loss_29"
+ input: "global_norm/L2Loss_30"
+ input: "global_norm/L2Loss_31"
+ input: "global_norm/L2Loss_32"
+ input: "global_norm/L2Loss_33"
+ input: "global_norm/L2Loss_34"
+ input: "global_norm/L2Loss_35"
+ input: "global_norm/L2Loss_36"
+ input: "global_norm/L2Loss_37"
+ input: "global_norm/L2Loss_38"
+ input: "global_norm/L2Loss_39"
+ input: "global_norm/L2Loss_40"
+ input: "global_norm/L2Loss_41"
+ input: "global_norm/L2Loss_42"
+ input: "global_norm/L2Loss_43"
+ input: "global_norm/L2Loss_44"
+ input: "global_norm/L2Loss_45"
+ input: "global_norm/L2Loss_46"
+ input: "global_norm/L2Loss_47"
+ input: "global_norm/L2Loss_48"
+ input: "global_norm/L2Loss_49"
+ input: "global_norm/L2Loss_50"
+ input: "global_norm/L2Loss_51"
+ input: "global_norm/L2Loss_52"
+ input: "global_norm/L2Loss_53"
+ input: "global_norm/L2Loss_54"
+ input: "global_norm/L2Loss_55"
+ input: "global_norm/L2Loss_56"
+ input: "global_norm/L2Loss_57"
+ input: "global_norm/L2Loss_58"
+ input: "global_norm/L2Loss_59"
+ input: "global_norm/L2Loss_60"
+ input: "global_norm/L2Loss_61"
+ input: "global_norm/L2Loss_62"
+ input: "global_norm/L2Loss_63"
+ input: "global_norm/L2Loss_64"
+ input: "global_norm/L2Loss_65"
+ input: "global_norm/L2Loss_66"
+ input: "global_norm/L2Loss_67"
+ input: "global_norm/L2Loss_68"
+ input: "global_norm/L2Loss_69"
+ input: "global_norm/L2Loss_70"
+ input: "global_norm/L2Loss_71"
+ input: "global_norm/L2Loss_72"
+ input: "global_norm/L2Loss_73"
+ input: "global_norm/L2Loss_74"
+ input: "global_norm/L2Loss_75"
+ input: "global_norm/L2Loss_76"
+ input: "global_norm/L2Loss_77"
+ input: "global_norm/L2Loss_78"
+ input: "global_norm/L2Loss_79"
+ input: "global_norm/L2Loss_80"
+ input: "global_norm/L2Loss_81"
+ input: "global_norm/L2Loss_82"
+ input: "global_norm/L2Loss_83"
+ input: "global_norm/L2Loss_84"
+ input: "global_norm/L2Loss_85"
+ input: "global_norm/L2Loss_86"
+ input: "global_norm/L2Loss_87"
+ input: "global_norm/L2Loss_88"
+ input: "global_norm/L2Loss_89"
+ input: "global_norm/L2Loss_90"
+ input: "global_norm/L2Loss_91"
+ input: "global_norm/L2Loss_92"
+ input: "global_norm/L2Loss_93"
+ input: "global_norm/L2Loss_94"
+ input: "global_norm/L2Loss_95"
+ input: "global_norm/L2Loss_96"
+ input: "global_norm/L2Loss_97"
+ input: "global_norm/L2Loss_98"
+ input: "global_norm/L2Loss_99"
+ input: "global_norm/L2Loss_100"
+ input: "global_norm/L2Loss_101"
+ input: "global_norm/L2Loss_102"
+ input: "global_norm/L2Loss_103"
+ input: "global_norm/L2Loss_104"
+ input: "global_norm/L2Loss_105"
+ input: "global_norm/L2Loss_106"
+ input: "global_norm/L2Loss_107"
+ input: "global_norm/L2Loss_108"
+ input: "global_norm/L2Loss_109"
+ input: "global_norm/L2Loss_110"
+ input: "global_norm/L2Loss_111"
+ input: "global_norm/L2Loss_112"
+ input: "global_norm/L2Loss_113"
+ input: "global_norm/L2Loss_114"
+ input: "global_norm/L2Loss_115"
+ input: "global_norm/L2Loss_116"
+ input: "global_norm/L2Loss_117"
+ input: "global_norm/L2Loss_118"
+ input: "global_norm/L2Loss_119"
+ input: "global_norm/L2Loss_120"
+ input: "global_norm/L2Loss_121"
+ input: "global_norm/L2Loss_122"
+ input: "global_norm/L2Loss_123"
+ input: "global_norm/L2Loss_124"
+ input: "global_norm/L2Loss_125"
+ input: "global_norm/L2Loss_126"
+ input: "global_norm/L2Loss_127"
+ input: "global_norm/L2Loss_128"
+ input: "global_norm/L2Loss_129"
+ input: "global_norm/L2Loss_130"
+ input: "global_norm/L2Loss_131"
+ input: "global_norm/L2Loss_132"
+ input: "global_norm/L2Loss_133"
+ input: "global_norm/L2Loss_134"
+ input: "global_norm/L2Loss_135"
+ input: "global_norm/L2Loss_136"
+ input: "global_norm/L2Loss_137"
+ input: "global_norm/L2Loss_138"
+ input: "global_norm/L2Loss_139"
+ input: "global_norm/L2Loss_140"
+ input: "global_norm/L2Loss_141"
+ input: "global_norm/L2Loss_142"
+ input: "global_norm/L2Loss_143"
+ input: "global_norm/L2Loss_144"
+ input: "global_norm/L2Loss_145"
+ input: "global_norm/L2Loss_146"
+ input: "global_norm/L2Loss_147"
+ input: "global_norm/L2Loss_148"
+ input: "global_norm/L2Loss_149"
+ input: "global_norm/L2Loss_150"
+ input: "global_norm/L2Loss_151"
+ input: "global_norm/L2Loss_152"
+ input: "global_norm/L2Loss_153"
+ input: "global_norm/L2Loss_154"
+ input: "global_norm/L2Loss_155"
+ input: "global_norm/L2Loss_156"
+ input: "global_norm/L2Loss_157"
+ input: "global_norm/L2Loss_158"
+ input: "global_norm/L2Loss_159"
+ input: "global_norm/L2Loss_160"
+ input: "global_norm/L2Loss_161"
+ input: "global_norm/L2Loss_162"
+ input: "global_norm/L2Loss_163"
+ input: "global_norm/L2Loss_164"
+ input: "global_norm/L2Loss_165"
+ input: "global_norm/L2Loss_166"
+ input: "global_norm/L2Loss_167"
+ input: "global_norm/L2Loss_168"
+ input: "global_norm/L2Loss_169"
+ input: "global_norm/L2Loss_170"
+ input: "global_norm/L2Loss_171"
+ input: "global_norm/L2Loss_172"
+ input: "global_norm/L2Loss_173"
+ input: "global_norm/L2Loss_174"
+ input: "global_norm/L2Loss_175"
+ input: "global_norm/L2Loss_176"
+ input: "global_norm/L2Loss_177"
+ input: "global_norm/L2Loss_178"
+ input: "global_norm/L2Loss_179"
+ input: "global_norm/L2Loss_180"
+ input: "global_norm/L2Loss_181"
+ input: "global_norm/L2Loss_182"
+ input: "global_norm/L2Loss_183"
+ input: "global_norm/L2Loss_184"
+ input: "global_norm/L2Loss_185"
+ input: "global_norm/L2Loss_186"
+ input: "global_norm/L2Loss_187"
+ input: "global_norm/L2Loss_188"
+ input: "global_norm/L2Loss_189"
+ input: "global_norm/L2Loss_190"
+ input: "global_norm/L2Loss_191"
+ input: "global_norm/L2Loss_192"
+ input: "global_norm/L2Loss_193"
+ input: "global_norm/L2Loss_194"
+ input: "global_norm/L2Loss_195"
+ input: "global_norm/L2Loss_196"
+ input: "global_norm/L2Loss_197"
+ input: "global_norm/L2Loss_198"
+ input: "global_norm/L2Loss_199"
+ input: "global_norm/L2Loss_200"
+ attr {
+ key: "N"
+ value {
+ i: 201
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 201
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "axis"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "global_norm/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/Sum"
+ op: "Sum"
+ input: "global_norm/stack"
+ input: "global_norm/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "global_norm/Const_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 2.0
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/mul"
+ op: "Mul"
+ input: "global_norm/Sum"
+ input: "global_norm/Const_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "global_norm/global_norm"
+ op: "Sqrt"
+ input: "global_norm/mul"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/truediv/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/truediv"
+ op: "RealDiv"
+ input: "clip_by_global_norm/truediv/x"
+ input: "global_norm/global_norm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/truediv_1/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/truediv_1"
+ op: "RealDiv"
+ input: "clip_by_global_norm/Const"
+ input: "clip_by_global_norm/truediv_1/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/Minimum"
+ op: "Minimum"
+ input: "clip_by_global_norm/truediv"
+ input: "clip_by_global_norm/truediv_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul"
+ op: "Mul"
+ input: "clip_by_global_norm/mul/x"
+ input: "clip_by_global_norm/Minimum"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/IsFinite"
+ op: "IsFinite"
+ input: "global_norm/global_norm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/Const_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: nan
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/Select"
+ op: "Select"
+ input: "clip_by_global_norm/IsFinite"
+ input: "clip_by_global_norm/mul"
+ input: "clip_by_global_norm/Const_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_1"
+ op: "Mul"
+ input: "gradients/bert/embeddings/GatherV2_grad/Reshape"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/GatherV2_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_0"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/GatherV2_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 4096
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_2"
+ op: "Mul"
+ input: "gradients/bert/embeddings/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_1"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_3"
+ op: "Mul"
+ input: "gradients/bert/embeddings/Slice_grad/Pad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/Slice_grad/Pad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_2"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_3"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/Slice_grad/Pad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_4"
+ op: "Mul"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Reshape"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_3"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_4"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_5"
+ op: "Mul"
+ input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_4"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_5"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_6"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_0/attention/self/query/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/self/query/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_5"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_6"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/self/query/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_7"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_0/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_6"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_7"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_8"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_0/attention/self/key/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/self/key/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_7"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_8"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/self/key/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_9"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_0/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_8"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_9"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_10"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_0/attention/self/value/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/self/value/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_9"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_10"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/self/value/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_11"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_0/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_10"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_11"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_12"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_0/attention/output/dense/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_11"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_12"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_13"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_0/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_12"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_13"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_14"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_13"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_14"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_15"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_14"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_15"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_16"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/intermediate/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_15"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_16"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/intermediate/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_17"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_0/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_16"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_17"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_18"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_0/output/dense/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_17"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_18"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_19"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_0/output/dense/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_18"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_19"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_20"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_19"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_20"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_21"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_20"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_21"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_22"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_1/attention/self/query/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/self/query/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_21"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_22"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/self/query/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_23"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_1/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_22"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_23"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_24"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_1/attention/self/key/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/self/key/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_23"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_24"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/self/key/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_25"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_1/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_24"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_25"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_26"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_1/attention/self/value/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/self/value/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_25"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_26"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/self/value/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_27"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_1/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_26"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_27"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_28"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_1/attention/output/dense/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_27"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_28"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_29"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_1/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_28"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_29"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_30"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_29"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_30"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_31"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_30"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_31"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_32"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/intermediate/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_31"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_32"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/intermediate/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_33"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_1/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_32"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_33"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_34"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_1/output/dense/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_33"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_34"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_35"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_1/output/dense/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_34"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_35"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_36"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_35"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_36"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_37"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_36"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_37"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_38"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_2/attention/self/query/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/self/query/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_37"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_38"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/self/query/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_39"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_2/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_38"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_39"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_40"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_2/attention/self/key/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/self/key/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_39"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_40"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/self/key/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_41"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_2/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_40"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_41"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_42"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_2/attention/self/value/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/self/value/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_41"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_42"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/self/value/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_43"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_2/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_42"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_43"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_44"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_2/attention/output/dense/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_43"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_44"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_45"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_2/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_44"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_45"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_46"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_45"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_46"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_47"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_46"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_47"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_48"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/intermediate/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_47"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_48"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/intermediate/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_49"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_2/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_48"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_49"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_50"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_2/output/dense/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_49"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_50"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_51"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_2/output/dense/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_50"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_51"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_52"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_51"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_52"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_53"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_52"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_53"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_54"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_3/attention/self/query/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/self/query/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_53"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_54"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/self/query/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_55"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_3/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_54"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_55"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_56"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_3/attention/self/key/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/self/key/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_55"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_56"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/self/key/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_57"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_3/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_56"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_57"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_58"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_3/attention/self/value/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/self/value/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_57"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_58"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/self/value/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_59"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_3/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_58"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_59"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_60"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_3/attention/output/dense/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_59"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_60"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_61"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_3/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_60"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_61"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_62"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_61"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_62"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_63"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_62"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_63"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_64"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/intermediate/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_63"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_64"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/intermediate/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_65"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_3/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_64"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_65"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_66"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_3/output/dense/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_65"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_66"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_67"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_3/output/dense/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_66"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_67"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_68"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_67"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_68"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_69"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_68"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_69"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_70"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_4/attention/self/query/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/self/query/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_69"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_70"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/self/query/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_71"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_4/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_70"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_71"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_72"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_4/attention/self/key/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/self/key/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_71"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_72"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/self/key/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_73"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_4/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_72"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_73"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_74"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_4/attention/self/value/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/self/value/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_73"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_74"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/self/value/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_75"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_4/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_74"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_75"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_76"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_4/attention/output/dense/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_75"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_76"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_77"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_4/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_76"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_77"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_78"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_77"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_78"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_79"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_78"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_79"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_80"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/intermediate/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_79"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_80"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/intermediate/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_81"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_4/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_80"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_81"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_82"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_4/output/dense/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_81"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_82"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_83"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_4/output/dense/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_82"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_83"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_84"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_83"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_84"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_85"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_84"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_85"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_86"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_5/attention/self/query/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/self/query/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_85"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_86"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/self/query/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_87"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_5/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_86"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_87"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_88"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_5/attention/self/key/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/self/key/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_87"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_88"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/self/key/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_89"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_5/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_88"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_89"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_90"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_5/attention/self/value/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/self/value/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_89"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_90"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/self/value/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_91"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_5/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_90"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_91"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_92"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_5/attention/output/dense/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_91"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_92"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_93"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_5/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_92"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_93"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_94"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_93"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_94"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_95"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_94"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_95"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_96"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/intermediate/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_95"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_96"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/intermediate/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_97"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_5/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_96"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_97"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_98"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_5/output/dense/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_97"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_98"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_99"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_5/output/dense/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_98"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_99"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_100"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_99"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_100"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_101"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_100"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_101"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_102"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_6/attention/self/query/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/self/query/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_101"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_102"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/self/query/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_103"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_6/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_102"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_103"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_104"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_6/attention/self/key/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/self/key/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_103"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_104"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/self/key/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_105"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_6/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_104"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_105"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_106"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_6/attention/self/value/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/self/value/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_105"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_106"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/self/value/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_107"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_6/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_106"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_107"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_108"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_6/attention/output/dense/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_107"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_108"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_109"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_6/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_108"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_109"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_110"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_109"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_110"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_111"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_110"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_111"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_112"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/intermediate/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_111"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_112"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/intermediate/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_113"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_6/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_112"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_113"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_114"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_6/output/dense/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_113"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_114"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_115"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_6/output/dense/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_114"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_115"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_116"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_115"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_116"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_117"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_116"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_117"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_118"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_7/attention/self/query/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/self/query/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_117"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_118"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/self/query/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_119"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_7/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_118"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_119"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_120"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_7/attention/self/key/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/self/key/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_119"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_120"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/self/key/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_121"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_7/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_120"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_121"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_122"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_7/attention/self/value/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/self/value/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_121"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_122"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/self/value/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_123"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_7/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_122"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_123"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_124"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_7/attention/output/dense/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_123"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_124"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_125"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_7/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_124"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_125"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_126"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_125"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_126"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_127"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_126"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_127"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_128"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/intermediate/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_127"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_128"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/intermediate/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_129"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_7/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_128"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_129"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_130"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_7/output/dense/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_129"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_130"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_131"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_7/output/dense/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_130"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_131"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_132"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_131"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_132"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_133"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_132"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_133"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_134"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_8/attention/self/query/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/self/query/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_133"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_134"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/self/query/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_135"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_8/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_134"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_135"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_136"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_8/attention/self/key/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/self/key/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_135"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_136"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/self/key/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_137"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_8/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_136"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_137"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_138"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_8/attention/self/value/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/self/value/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_137"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_138"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/self/value/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_139"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_8/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_138"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_139"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_140"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_8/attention/output/dense/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_139"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_140"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_141"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_8/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_140"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_141"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_142"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_141"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_142"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_143"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_142"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_143"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_144"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/intermediate/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_143"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_144"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/intermediate/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_145"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_8/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_144"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_145"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_146"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_8/output/dense/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_145"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_146"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_147"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_8/output/dense/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_146"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_147"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_148"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_147"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_148"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_149"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_148"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_149"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_150"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_9/attention/self/query/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/self/query/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_149"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_150"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/self/query/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_151"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_9/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_150"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_151"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_152"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_9/attention/self/key/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/self/key/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_151"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_152"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/self/key/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_153"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_9/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_152"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_153"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_154"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_9/attention/self/value/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/self/value/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_153"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_154"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/self/value/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_155"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_9/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_154"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_155"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_156"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_9/attention/output/dense/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_155"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_156"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_157"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_9/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_156"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_157"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_158"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_157"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_158"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_159"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_158"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_159"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_160"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/intermediate/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_159"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_160"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/intermediate/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_161"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_9/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_160"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_161"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_162"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_9/output/dense/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_161"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_162"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_163"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_9/output/dense/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_162"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_163"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_164"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_163"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_164"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_165"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_164"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_165"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_166"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_10/attention/self/query/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/self/query/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_165"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_166"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/self/query/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_167"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_10/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_166"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_167"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_168"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_10/attention/self/key/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/self/key/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_167"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_168"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/self/key/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_169"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_10/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_168"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_169"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_170"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_10/attention/self/value/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/self/value/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_169"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_170"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/self/value/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_171"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_10/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_170"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_171"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_172"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_10/attention/output/dense/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_171"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_172"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_173"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_10/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_172"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_173"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_174"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_173"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_174"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_175"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_174"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_175"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_176"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/intermediate/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_175"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_176"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/intermediate/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_177"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_10/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_176"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_177"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_178"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_10/output/dense/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_177"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_178"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_179"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_10/output/dense/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_178"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_179"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_180"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_179"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_180"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_181"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_180"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_181"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_182"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_11/attention/self/query/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/self/query/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_181"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_182"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/self/query/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_183"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_11/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_182"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_183"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/self/query/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_184"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_11/attention/self/key/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/self/key/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_183"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_184"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/self/key/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_185"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_11/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_184"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_185"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/self/key/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_186"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_11/attention/self/value/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/self/value/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_185"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_186"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/self/value/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_187"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_11/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_186"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_187"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/self/value/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_188"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_11/attention/output/dense/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_187"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_188"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_189"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_11/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_188"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_189"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_190"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_189"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_190"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_191"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_190"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_191"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_192"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/intermediate/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_191"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_192"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/intermediate/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_193"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_11/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_192"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_193"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/intermediate/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_194"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_11/output/dense/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_193"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_194"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/output/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_195"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_11/output/dense/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_194"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_195"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/output/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_196"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_195"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_196"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Reshape"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_197"
+ op: "Mul"
+ input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_196"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_197"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Reshape_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_198"
+ op: "Mul"
+ input: "gradients/bert/pooler/dense/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/pooler/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_197"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_198"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/pooler/dense/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_199"
+ op: "Mul"
+ input: "gradients/bert/pooler/dense/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/pooler/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_198"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_199"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/bert/pooler/dense/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_200"
+ op: "Mul"
+ input: "gradients/loss/MatMul_grad/MatMul_1"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/loss/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_199"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_200"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/loss/MatMul_grad/MatMul_1"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/mul_201"
+ op: "Mul"
+ input: "gradients/loss/BiasAdd_grad/BiasAddGrad"
+ input: "clip_by_global_norm/Select"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/loss/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "clip_by_global_norm/clip_by_global_norm/_200"
+ op: "Identity"
+ input: "clip_by_global_norm/mul_201"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@gradients/loss/BiasAdd_grad/BiasAddGrad"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/word_embeddings/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/word_embeddings/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\210R\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/word_embeddings/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/word_embeddings/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/word_embeddings/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/embeddings/word_embeddings/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/embeddings/word_embeddings/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/word_embeddings/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 21128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/embeddings/word_embeddings/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/word_embeddings/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 21128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 21128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/embeddings/word_embeddings/adam_m/Assign"
+ op: "Assign"
+ input: "bert/embeddings/word_embeddings/adam_m"
+ input: "bert/embeddings/word_embeddings/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/word_embeddings/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 21128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/embeddings/word_embeddings/adam_m/read"
+ op: "Identity"
+ input: "bert/embeddings/word_embeddings/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/word_embeddings/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 21128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/word_embeddings/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/word_embeddings/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\210R\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/word_embeddings/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/word_embeddings/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/word_embeddings/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/embeddings/word_embeddings/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/embeddings/word_embeddings/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/word_embeddings/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 21128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/embeddings/word_embeddings/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/word_embeddings/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 21128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 21128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/embeddings/word_embeddings/adam_v/Assign"
+ op: "Assign"
+ input: "bert/embeddings/word_embeddings/adam_v"
+ input: "bert/embeddings/word_embeddings/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/word_embeddings/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 21128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/embeddings/word_embeddings/adam_v/read"
+ op: "Identity"
+ input: "bert/embeddings/word_embeddings/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/word_embeddings/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 21128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_3/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_3"
+ op: "Mul"
+ input: "Mul_3/x"
+ input: "bert/embeddings/word_embeddings/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 21128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_4/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_4/strided_slice/stack"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "Mul_4/strided_slice/stack_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "Mul_4/strided_slice/stack_2"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "Mul_4/strided_slice"
+ op: "StridedSlice"
+ input: "gradients/bert/embeddings/GatherV2_grad/Cast"
+ input: "Mul_4/strided_slice/stack"
+ input: "Mul_4/strided_slice/stack_1"
+ input: "Mul_4/strided_slice/stack_2"
+ attr {
+ key: "Index"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "begin_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "ellipsis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "end_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "new_axis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "shrink_axis_mask"
+ value {
+ i: 1
+ }
+ }
+}
+node {
+ name: "Mul_4/y"
+ op: "UnsortedSegmentSum"
+ input: "clip_by_global_norm/clip_by_global_norm/_0"
+ input: "gradients/bert/embeddings/GatherV2_grad/Reshape_1"
+ input: "Mul_4/strided_slice"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tindices"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "Tnumsegments"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 21128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_4"
+ op: "Mul"
+ input: "Mul_4/x"
+ input: "Mul_4/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 21128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_1"
+ op: "Add"
+ input: "Mul_3"
+ input: "Mul_4"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 21128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_5/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_5"
+ op: "Mul"
+ input: "Mul_5/x"
+ input: "bert/embeddings/word_embeddings/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 21128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square/strided_slice/stack"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "Square/strided_slice/stack_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "Square/strided_slice/stack_2"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "Square/strided_slice"
+ op: "StridedSlice"
+ input: "gradients/bert/embeddings/GatherV2_grad/Cast"
+ input: "Square/strided_slice/stack"
+ input: "Square/strided_slice/stack_1"
+ input: "Square/strided_slice/stack_2"
+ attr {
+ key: "Index"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "begin_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "ellipsis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "end_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "new_axis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "shrink_axis_mask"
+ value {
+ i: 1
+ }
+ }
+}
+node {
+ name: "Square/x"
+ op: "UnsortedSegmentSum"
+ input: "clip_by_global_norm/clip_by_global_norm/_0"
+ input: "gradients/bert/embeddings/GatherV2_grad/Reshape_1"
+ input: "Square/strided_slice"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tindices"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "Tnumsegments"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 21128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square"
+ op: "Square"
+ input: "Square/x"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 21128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_6/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_6"
+ op: "Mul"
+ input: "Mul_6/x"
+ input: "Square"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 21128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_2"
+ op: "Add"
+ input: "Mul_5"
+ input: "Mul_6"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 21128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt"
+ op: "Sqrt"
+ input: "add_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 21128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_3/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_3"
+ op: "Add"
+ input: "Sqrt"
+ input: "add_3/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 21128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_1"
+ op: "RealDiv"
+ input: "add_1"
+ input: "add_3"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 21128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_7/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_7"
+ op: "Mul"
+ input: "mul_7/x"
+ input: "bert/embeddings/word_embeddings/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 21128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_4"
+ op: "Add"
+ input: "truediv_1"
+ input: "mul_7"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 21128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_8"
+ op: "Mul"
+ input: "add"
+ input: "add_4"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 21128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_1"
+ op: "Sub"
+ input: "bert/embeddings/word_embeddings/read"
+ input: "mul_8"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 21128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_199"
+ op: "Assign"
+ input: "bert/embeddings/word_embeddings"
+ input: "sub_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/word_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 21128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_200"
+ op: "Assign"
+ input: "bert/embeddings/word_embeddings/adam_m"
+ input: "add_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/word_embeddings/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 21128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_201"
+ op: "Assign"
+ input: "bert/embeddings/word_embeddings/adam_v"
+ input: "add_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/word_embeddings/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 21128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/embeddings/token_type_embeddings/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/token_type_embeddings/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\002\000\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/token_type_embeddings/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/token_type_embeddings/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/token_type_embeddings/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/embeddings/token_type_embeddings/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/embeddings/token_type_embeddings/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/token_type_embeddings/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/embeddings/token_type_embeddings/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/token_type_embeddings/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/embeddings/token_type_embeddings/adam_m/Assign"
+ op: "Assign"
+ input: "bert/embeddings/token_type_embeddings/adam_m"
+ input: "bert/embeddings/token_type_embeddings/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/token_type_embeddings/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/embeddings/token_type_embeddings/adam_m/read"
+ op: "Identity"
+ input: "bert/embeddings/token_type_embeddings/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/token_type_embeddings/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/token_type_embeddings/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/token_type_embeddings/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\002\000\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/token_type_embeddings/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/token_type_embeddings/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/token_type_embeddings/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/embeddings/token_type_embeddings/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/embeddings/token_type_embeddings/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/token_type_embeddings/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/embeddings/token_type_embeddings/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/token_type_embeddings/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/embeddings/token_type_embeddings/adam_v/Assign"
+ op: "Assign"
+ input: "bert/embeddings/token_type_embeddings/adam_v"
+ input: "bert/embeddings/token_type_embeddings/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/token_type_embeddings/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/embeddings/token_type_embeddings/adam_v/read"
+ op: "Identity"
+ input: "bert/embeddings/token_type_embeddings/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/token_type_embeddings/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_9/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_9"
+ op: "Mul"
+ input: "Mul_9/x"
+ input: "bert/embeddings/token_type_embeddings/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_10/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_10"
+ op: "Mul"
+ input: "Mul_10/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_5"
+ op: "Add"
+ input: "Mul_9"
+ input: "Mul_10"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_11/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_11"
+ op: "Mul"
+ input: "Mul_11/x"
+ input: "bert/embeddings/token_type_embeddings/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_1"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_12/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_12"
+ op: "Mul"
+ input: "Mul_12/x"
+ input: "Square_1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_6"
+ op: "Add"
+ input: "Mul_11"
+ input: "Mul_12"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_1"
+ op: "Sqrt"
+ input: "add_6"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_7/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_7"
+ op: "Add"
+ input: "Sqrt_1"
+ input: "add_7/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_2"
+ op: "RealDiv"
+ input: "add_5"
+ input: "add_7"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_13/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_13"
+ op: "Mul"
+ input: "mul_13/x"
+ input: "bert/embeddings/token_type_embeddings/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_8"
+ op: "Add"
+ input: "truediv_2"
+ input: "mul_13"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_14"
+ op: "Mul"
+ input: "add"
+ input: "add_8"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_2"
+ op: "Sub"
+ input: "bert/embeddings/token_type_embeddings/read"
+ input: "mul_14"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_202"
+ op: "Assign"
+ input: "bert/embeddings/token_type_embeddings"
+ input: "sub_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/token_type_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_203"
+ op: "Assign"
+ input: "bert/embeddings/token_type_embeddings/adam_m"
+ input: "add_5"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/token_type_embeddings/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_204"
+ op: "Assign"
+ input: "bert/embeddings/token_type_embeddings/adam_v"
+ input: "add_6"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/token_type_embeddings/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/embeddings/position_embeddings/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/position_embeddings/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\002\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/position_embeddings/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/position_embeddings/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/position_embeddings/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/embeddings/position_embeddings/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/embeddings/position_embeddings/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/position_embeddings/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/embeddings/position_embeddings/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/position_embeddings/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/embeddings/position_embeddings/adam_m/Assign"
+ op: "Assign"
+ input: "bert/embeddings/position_embeddings/adam_m"
+ input: "bert/embeddings/position_embeddings/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/position_embeddings/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/embeddings/position_embeddings/adam_m/read"
+ op: "Identity"
+ input: "bert/embeddings/position_embeddings/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/position_embeddings/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/position_embeddings/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/position_embeddings/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\002\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/position_embeddings/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/position_embeddings/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/position_embeddings/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/embeddings/position_embeddings/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/embeddings/position_embeddings/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/position_embeddings/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/embeddings/position_embeddings/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/position_embeddings/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/embeddings/position_embeddings/adam_v/Assign"
+ op: "Assign"
+ input: "bert/embeddings/position_embeddings/adam_v"
+ input: "bert/embeddings/position_embeddings/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/position_embeddings/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/embeddings/position_embeddings/adam_v/read"
+ op: "Identity"
+ input: "bert/embeddings/position_embeddings/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/position_embeddings/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_15/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_15"
+ op: "Mul"
+ input: "Mul_15/x"
+ input: "bert/embeddings/position_embeddings/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_16/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_16"
+ op: "Mul"
+ input: "Mul_16/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_9"
+ op: "Add"
+ input: "Mul_15"
+ input: "Mul_16"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_17/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_17"
+ op: "Mul"
+ input: "Mul_17/x"
+ input: "bert/embeddings/position_embeddings/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_2"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_18/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_18"
+ op: "Mul"
+ input: "Mul_18/x"
+ input: "Square_2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_10"
+ op: "Add"
+ input: "Mul_17"
+ input: "Mul_18"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_2"
+ op: "Sqrt"
+ input: "add_10"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_11/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_11"
+ op: "Add"
+ input: "Sqrt_2"
+ input: "add_11/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_3"
+ op: "RealDiv"
+ input: "add_9"
+ input: "add_11"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_19/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_19"
+ op: "Mul"
+ input: "mul_19/x"
+ input: "bert/embeddings/position_embeddings/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_12"
+ op: "Add"
+ input: "truediv_3"
+ input: "mul_19"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_20"
+ op: "Mul"
+ input: "add"
+ input: "add_12"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_3"
+ op: "Sub"
+ input: "bert/embeddings/position_embeddings/read"
+ input: "mul_20"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_205"
+ op: "Assign"
+ input: "bert/embeddings/position_embeddings"
+ input: "sub_3"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/position_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_206"
+ op: "Assign"
+ input: "bert/embeddings/position_embeddings/adam_m"
+ input: "add_9"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/position_embeddings/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_207"
+ op: "Assign"
+ input: "bert/embeddings/position_embeddings/adam_v"
+ input: "add_10"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/position_embeddings/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/embeddings/LayerNorm/beta/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/LayerNorm/beta/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/embeddings/LayerNorm/beta/adam_m/Assign"
+ op: "Assign"
+ input: "bert/embeddings/LayerNorm/beta/adam_m"
+ input: "bert/embeddings/LayerNorm/beta/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/embeddings/LayerNorm/beta/adam_m/read"
+ op: "Identity"
+ input: "bert/embeddings/LayerNorm/beta/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/LayerNorm/beta/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/LayerNorm/beta/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/embeddings/LayerNorm/beta/adam_v/Assign"
+ op: "Assign"
+ input: "bert/embeddings/LayerNorm/beta/adam_v"
+ input: "bert/embeddings/LayerNorm/beta/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/embeddings/LayerNorm/beta/adam_v/read"
+ op: "Identity"
+ input: "bert/embeddings/LayerNorm/beta/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_21/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_21"
+ op: "Mul"
+ input: "Mul_21/x"
+ input: "bert/embeddings/LayerNorm/beta/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_22/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_22"
+ op: "Mul"
+ input: "Mul_22/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_3"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_13"
+ op: "Add"
+ input: "Mul_21"
+ input: "Mul_22"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_23/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_23"
+ op: "Mul"
+ input: "Mul_23/x"
+ input: "bert/embeddings/LayerNorm/beta/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_3"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_3"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_24/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_24"
+ op: "Mul"
+ input: "Mul_24/x"
+ input: "Square_3"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_14"
+ op: "Add"
+ input: "Mul_23"
+ input: "Mul_24"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_3"
+ op: "Sqrt"
+ input: "add_14"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_15/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_15"
+ op: "Add"
+ input: "Sqrt_3"
+ input: "add_15/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_4"
+ op: "RealDiv"
+ input: "add_13"
+ input: "add_15"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_25"
+ op: "Mul"
+ input: "add"
+ input: "truediv_4"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_4"
+ op: "Sub"
+ input: "bert/embeddings/LayerNorm/beta/read"
+ input: "mul_25"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_208"
+ op: "Assign"
+ input: "bert/embeddings/LayerNorm/beta"
+ input: "sub_4"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_209"
+ op: "Assign"
+ input: "bert/embeddings/LayerNorm/beta/adam_m"
+ input: "add_13"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_210"
+ op: "Assign"
+ input: "bert/embeddings/LayerNorm/beta/adam_v"
+ input: "add_14"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/embeddings/LayerNorm/gamma/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/LayerNorm/gamma/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/embeddings/LayerNorm/gamma/adam_m/Assign"
+ op: "Assign"
+ input: "bert/embeddings/LayerNorm/gamma/adam_m"
+ input: "bert/embeddings/LayerNorm/gamma/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/embeddings/LayerNorm/gamma/adam_m/read"
+ op: "Identity"
+ input: "bert/embeddings/LayerNorm/gamma/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/LayerNorm/gamma/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/embeddings/LayerNorm/gamma/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/embeddings/LayerNorm/gamma/adam_v/Assign"
+ op: "Assign"
+ input: "bert/embeddings/LayerNorm/gamma/adam_v"
+ input: "bert/embeddings/LayerNorm/gamma/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/embeddings/LayerNorm/gamma/adam_v/read"
+ op: "Identity"
+ input: "bert/embeddings/LayerNorm/gamma/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_26/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_26"
+ op: "Mul"
+ input: "Mul_26/x"
+ input: "bert/embeddings/LayerNorm/gamma/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_27/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_27"
+ op: "Mul"
+ input: "Mul_27/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_4"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_16"
+ op: "Add"
+ input: "Mul_26"
+ input: "Mul_27"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_28/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_28"
+ op: "Mul"
+ input: "Mul_28/x"
+ input: "bert/embeddings/LayerNorm/gamma/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_4"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_4"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_29/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_29"
+ op: "Mul"
+ input: "Mul_29/x"
+ input: "Square_4"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_17"
+ op: "Add"
+ input: "Mul_28"
+ input: "Mul_29"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_4"
+ op: "Sqrt"
+ input: "add_17"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_18/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_18"
+ op: "Add"
+ input: "Sqrt_4"
+ input: "add_18/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_5"
+ op: "RealDiv"
+ input: "add_16"
+ input: "add_18"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_30"
+ op: "Mul"
+ input: "add"
+ input: "truediv_5"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_5"
+ op: "Sub"
+ input: "bert/embeddings/LayerNorm/gamma/read"
+ input: "mul_30"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_211"
+ op: "Assign"
+ input: "bert/embeddings/LayerNorm/gamma"
+ input: "sub_5"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_212"
+ op: "Assign"
+ input: "bert/embeddings/LayerNorm/gamma/adam_m"
+ input: "add_16"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_213"
+ op: "Assign"
+ input: "bert/embeddings/LayerNorm/gamma/adam_v"
+ input: "add_17"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/query/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/query/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_0/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_0/attention/self/query/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/query/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/query/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/query/kernel/adam_m"
+ input: "bert/encoder/layer_0/attention/self/query/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/query/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/attention/self/query/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/query/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/query/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_0/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_0/attention/self/query/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/query/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/query/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/query/kernel/adam_v"
+ input: "bert/encoder/layer_0/attention/self/query/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/query/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/attention/self/query/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_31/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_31"
+ op: "Mul"
+ input: "Mul_31/x"
+ input: "bert/encoder/layer_0/attention/self/query/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_32/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_32"
+ op: "Mul"
+ input: "Mul_32/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_5"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_19"
+ op: "Add"
+ input: "Mul_31"
+ input: "Mul_32"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_33/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_33"
+ op: "Mul"
+ input: "Mul_33/x"
+ input: "bert/encoder/layer_0/attention/self/query/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_5"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_5"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_34/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_34"
+ op: "Mul"
+ input: "Mul_34/x"
+ input: "Square_5"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_20"
+ op: "Add"
+ input: "Mul_33"
+ input: "Mul_34"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_5"
+ op: "Sqrt"
+ input: "add_20"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_21/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_21"
+ op: "Add"
+ input: "Sqrt_5"
+ input: "add_21/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_6"
+ op: "RealDiv"
+ input: "add_19"
+ input: "add_21"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_35/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_35"
+ op: "Mul"
+ input: "mul_35/x"
+ input: "bert/encoder/layer_0/attention/self/query/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_22"
+ op: "Add"
+ input: "truediv_6"
+ input: "mul_35"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_36"
+ op: "Mul"
+ input: "add"
+ input: "add_22"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_6"
+ op: "Sub"
+ input: "bert/encoder/layer_0/attention/self/query/kernel/read"
+ input: "mul_36"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_214"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/query/kernel"
+ input: "sub_6"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_215"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/query/kernel/adam_m"
+ input: "add_19"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_216"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/query/kernel/adam_v"
+ input: "add_20"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/query/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/query/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/query/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/query/bias/adam_m"
+ input: "bert/encoder/layer_0/attention/self/query/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/query/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/attention/self/query/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/query/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/query/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/query/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/query/bias/adam_v"
+ input: "bert/encoder/layer_0/attention/self/query/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/query/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/attention/self/query/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_37/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_37"
+ op: "Mul"
+ input: "Mul_37/x"
+ input: "bert/encoder/layer_0/attention/self/query/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_38/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_38"
+ op: "Mul"
+ input: "Mul_38/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_6"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_23"
+ op: "Add"
+ input: "Mul_37"
+ input: "Mul_38"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_39/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_39"
+ op: "Mul"
+ input: "Mul_39/x"
+ input: "bert/encoder/layer_0/attention/self/query/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_6"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_6"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_40/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_40"
+ op: "Mul"
+ input: "Mul_40/x"
+ input: "Square_6"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_24"
+ op: "Add"
+ input: "Mul_39"
+ input: "Mul_40"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_6"
+ op: "Sqrt"
+ input: "add_24"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_25/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_25"
+ op: "Add"
+ input: "Sqrt_6"
+ input: "add_25/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_7"
+ op: "RealDiv"
+ input: "add_23"
+ input: "add_25"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_41"
+ op: "Mul"
+ input: "add"
+ input: "truediv_7"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_7"
+ op: "Sub"
+ input: "bert/encoder/layer_0/attention/self/query/bias/read"
+ input: "mul_41"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_217"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/query/bias"
+ input: "sub_7"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_218"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/query/bias/adam_m"
+ input: "add_23"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_219"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/query/bias/adam_v"
+ input: "add_24"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/key/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/key/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_0/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_0/attention/self/key/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/key/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/key/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/key/kernel/adam_m"
+ input: "bert/encoder/layer_0/attention/self/key/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/key/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/attention/self/key/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/key/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/key/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_0/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_0/attention/self/key/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/key/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/key/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/key/kernel/adam_v"
+ input: "bert/encoder/layer_0/attention/self/key/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/key/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/attention/self/key/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_42/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_42"
+ op: "Mul"
+ input: "Mul_42/x"
+ input: "bert/encoder/layer_0/attention/self/key/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_43/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_43"
+ op: "Mul"
+ input: "Mul_43/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_7"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_26"
+ op: "Add"
+ input: "Mul_42"
+ input: "Mul_43"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_44/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_44"
+ op: "Mul"
+ input: "Mul_44/x"
+ input: "bert/encoder/layer_0/attention/self/key/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_7"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_7"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_45/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_45"
+ op: "Mul"
+ input: "Mul_45/x"
+ input: "Square_7"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_27"
+ op: "Add"
+ input: "Mul_44"
+ input: "Mul_45"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_7"
+ op: "Sqrt"
+ input: "add_27"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_28/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_28"
+ op: "Add"
+ input: "Sqrt_7"
+ input: "add_28/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_8"
+ op: "RealDiv"
+ input: "add_26"
+ input: "add_28"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_46/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_46"
+ op: "Mul"
+ input: "mul_46/x"
+ input: "bert/encoder/layer_0/attention/self/key/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_29"
+ op: "Add"
+ input: "truediv_8"
+ input: "mul_46"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_47"
+ op: "Mul"
+ input: "add"
+ input: "add_29"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_8"
+ op: "Sub"
+ input: "bert/encoder/layer_0/attention/self/key/kernel/read"
+ input: "mul_47"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_220"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/key/kernel"
+ input: "sub_8"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_221"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/key/kernel/adam_m"
+ input: "add_26"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_222"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/key/kernel/adam_v"
+ input: "add_27"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/key/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/key/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/key/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/key/bias/adam_m"
+ input: "bert/encoder/layer_0/attention/self/key/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/key/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/attention/self/key/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/key/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/key/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/key/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/key/bias/adam_v"
+ input: "bert/encoder/layer_0/attention/self/key/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/key/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/attention/self/key/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_48/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_48"
+ op: "Mul"
+ input: "Mul_48/x"
+ input: "bert/encoder/layer_0/attention/self/key/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_49/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_49"
+ op: "Mul"
+ input: "Mul_49/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_8"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_30"
+ op: "Add"
+ input: "Mul_48"
+ input: "Mul_49"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_50/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_50"
+ op: "Mul"
+ input: "Mul_50/x"
+ input: "bert/encoder/layer_0/attention/self/key/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_8"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_8"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_51/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_51"
+ op: "Mul"
+ input: "Mul_51/x"
+ input: "Square_8"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_31"
+ op: "Add"
+ input: "Mul_50"
+ input: "Mul_51"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_8"
+ op: "Sqrt"
+ input: "add_31"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_32/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_32"
+ op: "Add"
+ input: "Sqrt_8"
+ input: "add_32/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_9"
+ op: "RealDiv"
+ input: "add_30"
+ input: "add_32"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_52"
+ op: "Mul"
+ input: "add"
+ input: "truediv_9"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_9"
+ op: "Sub"
+ input: "bert/encoder/layer_0/attention/self/key/bias/read"
+ input: "mul_52"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_223"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/key/bias"
+ input: "sub_9"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_224"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/key/bias/adam_m"
+ input: "add_30"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_225"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/key/bias/adam_v"
+ input: "add_31"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/value/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/value/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_0/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_0/attention/self/value/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/value/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/value/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/value/kernel/adam_m"
+ input: "bert/encoder/layer_0/attention/self/value/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/value/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/attention/self/value/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/value/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/value/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_0/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_0/attention/self/value/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/value/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/value/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/value/kernel/adam_v"
+ input: "bert/encoder/layer_0/attention/self/value/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/value/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/attention/self/value/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_53/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_53"
+ op: "Mul"
+ input: "Mul_53/x"
+ input: "bert/encoder/layer_0/attention/self/value/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_54/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_54"
+ op: "Mul"
+ input: "Mul_54/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_9"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_33"
+ op: "Add"
+ input: "Mul_53"
+ input: "Mul_54"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_55/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_55"
+ op: "Mul"
+ input: "Mul_55/x"
+ input: "bert/encoder/layer_0/attention/self/value/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_9"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_9"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_56/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_56"
+ op: "Mul"
+ input: "Mul_56/x"
+ input: "Square_9"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_34"
+ op: "Add"
+ input: "Mul_55"
+ input: "Mul_56"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_9"
+ op: "Sqrt"
+ input: "add_34"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_35/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_35"
+ op: "Add"
+ input: "Sqrt_9"
+ input: "add_35/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_10"
+ op: "RealDiv"
+ input: "add_33"
+ input: "add_35"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_57/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_57"
+ op: "Mul"
+ input: "mul_57/x"
+ input: "bert/encoder/layer_0/attention/self/value/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_36"
+ op: "Add"
+ input: "truediv_10"
+ input: "mul_57"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_58"
+ op: "Mul"
+ input: "add"
+ input: "add_36"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_10"
+ op: "Sub"
+ input: "bert/encoder/layer_0/attention/self/value/kernel/read"
+ input: "mul_58"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_226"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/value/kernel"
+ input: "sub_10"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_227"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/value/kernel/adam_m"
+ input: "add_33"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_228"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/value/kernel/adam_v"
+ input: "add_34"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/value/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/value/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/value/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/value/bias/adam_m"
+ input: "bert/encoder/layer_0/attention/self/value/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/value/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/attention/self/value/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/value/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/value/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/value/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/value/bias/adam_v"
+ input: "bert/encoder/layer_0/attention/self/value/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/self/value/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/attention/self/value/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_59/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_59"
+ op: "Mul"
+ input: "Mul_59/x"
+ input: "bert/encoder/layer_0/attention/self/value/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_60/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_60"
+ op: "Mul"
+ input: "Mul_60/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_10"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_37"
+ op: "Add"
+ input: "Mul_59"
+ input: "Mul_60"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_61/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_61"
+ op: "Mul"
+ input: "Mul_61/x"
+ input: "bert/encoder/layer_0/attention/self/value/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_10"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_10"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_62/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_62"
+ op: "Mul"
+ input: "Mul_62/x"
+ input: "Square_10"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_38"
+ op: "Add"
+ input: "Mul_61"
+ input: "Mul_62"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_10"
+ op: "Sqrt"
+ input: "add_38"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_39/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_39"
+ op: "Add"
+ input: "Sqrt_10"
+ input: "add_39/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_11"
+ op: "RealDiv"
+ input: "add_37"
+ input: "add_39"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_63"
+ op: "Mul"
+ input: "add"
+ input: "truediv_11"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_11"
+ op: "Sub"
+ input: "bert/encoder/layer_0/attention/self/value/bias/read"
+ input: "mul_63"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_229"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/value/bias"
+ input: "sub_11"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_230"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/value/bias/adam_m"
+ input: "add_37"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_231"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/value/bias/adam_v"
+ input: "add_38"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_64/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_64"
+ op: "Mul"
+ input: "Mul_64/x"
+ input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_65/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_65"
+ op: "Mul"
+ input: "Mul_65/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_11"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_40"
+ op: "Add"
+ input: "Mul_64"
+ input: "Mul_65"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_66/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_66"
+ op: "Mul"
+ input: "Mul_66/x"
+ input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_11"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_11"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_67/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_67"
+ op: "Mul"
+ input: "Mul_67/x"
+ input: "Square_11"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_41"
+ op: "Add"
+ input: "Mul_66"
+ input: "Mul_67"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_11"
+ op: "Sqrt"
+ input: "add_41"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_42/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_42"
+ op: "Add"
+ input: "Sqrt_11"
+ input: "add_42/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_12"
+ op: "RealDiv"
+ input: "add_40"
+ input: "add_42"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_68/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_68"
+ op: "Mul"
+ input: "mul_68/x"
+ input: "bert/encoder/layer_0/attention/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_43"
+ op: "Add"
+ input: "truediv_12"
+ input: "mul_68"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_69"
+ op: "Mul"
+ input: "add"
+ input: "add_43"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_12"
+ op: "Sub"
+ input: "bert/encoder/layer_0/attention/output/dense/kernel/read"
+ input: "mul_69"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_232"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/output/dense/kernel"
+ input: "sub_12"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_233"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m"
+ input: "add_40"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_234"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v"
+ input: "add_41"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dense/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dense/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dense/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_0/attention/output/dense/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dense/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/attention/output/dense/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dense/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dense/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dense/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_0/attention/output/dense/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/dense/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/attention/output/dense/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_70/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_70"
+ op: "Mul"
+ input: "Mul_70/x"
+ input: "bert/encoder/layer_0/attention/output/dense/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_71/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_71"
+ op: "Mul"
+ input: "Mul_71/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_12"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_44"
+ op: "Add"
+ input: "Mul_70"
+ input: "Mul_71"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_72/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_72"
+ op: "Mul"
+ input: "Mul_72/x"
+ input: "bert/encoder/layer_0/attention/output/dense/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_12"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_12"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_73/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_73"
+ op: "Mul"
+ input: "Mul_73/x"
+ input: "Square_12"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_45"
+ op: "Add"
+ input: "Mul_72"
+ input: "Mul_73"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_12"
+ op: "Sqrt"
+ input: "add_45"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_46/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_46"
+ op: "Add"
+ input: "Sqrt_12"
+ input: "add_46/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_13"
+ op: "RealDiv"
+ input: "add_44"
+ input: "add_46"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_74"
+ op: "Mul"
+ input: "add"
+ input: "truediv_13"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_13"
+ op: "Sub"
+ input: "bert/encoder/layer_0/attention/output/dense/bias/read"
+ input: "mul_74"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_235"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/output/dense/bias"
+ input: "sub_13"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_236"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/output/dense/bias/adam_m"
+ input: "add_44"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_237"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/output/dense/bias/adam_v"
+ input: "add_45"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_75/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_75"
+ op: "Mul"
+ input: "Mul_75/x"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_76/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_76"
+ op: "Mul"
+ input: "Mul_76/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_13"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_47"
+ op: "Add"
+ input: "Mul_75"
+ input: "Mul_76"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_77/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_77"
+ op: "Mul"
+ input: "Mul_77/x"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_13"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_13"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_78/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_78"
+ op: "Mul"
+ input: "Mul_78/x"
+ input: "Square_13"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_48"
+ op: "Add"
+ input: "Mul_77"
+ input: "Mul_78"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_13"
+ op: "Sqrt"
+ input: "add_48"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_49/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_49"
+ op: "Add"
+ input: "Sqrt_13"
+ input: "add_49/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_14"
+ op: "RealDiv"
+ input: "add_47"
+ input: "add_49"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_79"
+ op: "Mul"
+ input: "add"
+ input: "truediv_14"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_14"
+ op: "Sub"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/read"
+ input: "mul_79"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_238"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/beta"
+ input: "sub_14"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_239"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m"
+ input: "add_47"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_240"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v"
+ input: "add_48"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_80/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_80"
+ op: "Mul"
+ input: "Mul_80/x"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_81/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_81"
+ op: "Mul"
+ input: "Mul_81/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_14"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_50"
+ op: "Add"
+ input: "Mul_80"
+ input: "Mul_81"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_82/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_82"
+ op: "Mul"
+ input: "Mul_82/x"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_14"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_14"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_83/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_83"
+ op: "Mul"
+ input: "Mul_83/x"
+ input: "Square_14"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_51"
+ op: "Add"
+ input: "Mul_82"
+ input: "Mul_83"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_14"
+ op: "Sqrt"
+ input: "add_51"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_52/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_52"
+ op: "Add"
+ input: "Sqrt_14"
+ input: "add_52/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_15"
+ op: "RealDiv"
+ input: "add_50"
+ input: "add_52"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_84"
+ op: "Mul"
+ input: "add"
+ input: "truediv_15"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_15"
+ op: "Sub"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/read"
+ input: "mul_84"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_241"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma"
+ input: "sub_15"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_242"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m"
+ input: "add_50"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_243"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v"
+ input: "add_51"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m"
+ input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v"
+ input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_85/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_85"
+ op: "Mul"
+ input: "Mul_85/x"
+ input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_86/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_86"
+ op: "Mul"
+ input: "Mul_86/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_15"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_53"
+ op: "Add"
+ input: "Mul_85"
+ input: "Mul_86"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_87/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_87"
+ op: "Mul"
+ input: "Mul_87/x"
+ input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_15"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_15"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_88/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_88"
+ op: "Mul"
+ input: "Mul_88/x"
+ input: "Square_15"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_54"
+ op: "Add"
+ input: "Mul_87"
+ input: "Mul_88"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_15"
+ op: "Sqrt"
+ input: "add_54"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_55/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_55"
+ op: "Add"
+ input: "Sqrt_15"
+ input: "add_55/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_16"
+ op: "RealDiv"
+ input: "add_53"
+ input: "add_55"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_89/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_89"
+ op: "Mul"
+ input: "mul_89/x"
+ input: "bert/encoder/layer_0/intermediate/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_56"
+ op: "Add"
+ input: "truediv_16"
+ input: "mul_89"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_90"
+ op: "Mul"
+ input: "add"
+ input: "add_56"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_16"
+ op: "Sub"
+ input: "bert/encoder/layer_0/intermediate/dense/kernel/read"
+ input: "mul_90"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_244"
+ op: "Assign"
+ input: "bert/encoder/layer_0/intermediate/dense/kernel"
+ input: "sub_16"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_245"
+ op: "Assign"
+ input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m"
+ input: "add_53"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_246"
+ op: "Assign"
+ input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v"
+ input: "add_54"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 3072
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/bias/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/bias/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_0/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_0/intermediate/dense/bias/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/intermediate/dense/bias/adam_m"
+ input: "bert/encoder/layer_0/intermediate/dense/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/intermediate/dense/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 3072
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/bias/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/bias/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_0/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_0/intermediate/dense/bias/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/intermediate/dense/bias/adam_v"
+ input: "bert/encoder/layer_0/intermediate/dense/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/intermediate/dense/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/intermediate/dense/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_91/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_91"
+ op: "Mul"
+ input: "Mul_91/x"
+ input: "bert/encoder/layer_0/intermediate/dense/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_92/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_92"
+ op: "Mul"
+ input: "Mul_92/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_16"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_57"
+ op: "Add"
+ input: "Mul_91"
+ input: "Mul_92"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_93/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_93"
+ op: "Mul"
+ input: "Mul_93/x"
+ input: "bert/encoder/layer_0/intermediate/dense/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_16"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_16"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_94/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_94"
+ op: "Mul"
+ input: "Mul_94/x"
+ input: "Square_16"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_58"
+ op: "Add"
+ input: "Mul_93"
+ input: "Mul_94"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_16"
+ op: "Sqrt"
+ input: "add_58"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_59/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_59"
+ op: "Add"
+ input: "Sqrt_16"
+ input: "add_59/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_17"
+ op: "RealDiv"
+ input: "add_57"
+ input: "add_59"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_95"
+ op: "Mul"
+ input: "add"
+ input: "truediv_17"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_17"
+ op: "Sub"
+ input: "bert/encoder/layer_0/intermediate/dense/bias/read"
+ input: "mul_95"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_247"
+ op: "Assign"
+ input: "bert/encoder/layer_0/intermediate/dense/bias"
+ input: "sub_17"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_248"
+ op: "Assign"
+ input: "bert/encoder/layer_0/intermediate/dense/bias/adam_m"
+ input: "add_57"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_249"
+ op: "Assign"
+ input: "bert/encoder/layer_0/intermediate/dense/bias/adam_v"
+ input: "add_58"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\014\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dense/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_0/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_0/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dense/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dense/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_0/output/dense/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dense/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/output/dense/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\014\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dense/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_0/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_0/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dense/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dense/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_0/output/dense/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dense/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/output/dense/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_96/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_96"
+ op: "Mul"
+ input: "Mul_96/x"
+ input: "bert/encoder/layer_0/output/dense/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_97/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_97"
+ op: "Mul"
+ input: "Mul_97/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_17"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_60"
+ op: "Add"
+ input: "Mul_96"
+ input: "Mul_97"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_98/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_98"
+ op: "Mul"
+ input: "Mul_98/x"
+ input: "bert/encoder/layer_0/output/dense/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_17"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_17"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_99/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_99"
+ op: "Mul"
+ input: "Mul_99/x"
+ input: "Square_17"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_61"
+ op: "Add"
+ input: "Mul_98"
+ input: "Mul_99"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_17"
+ op: "Sqrt"
+ input: "add_61"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_62/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_62"
+ op: "Add"
+ input: "Sqrt_17"
+ input: "add_62/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_18"
+ op: "RealDiv"
+ input: "add_60"
+ input: "add_62"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_100/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_100"
+ op: "Mul"
+ input: "mul_100/x"
+ input: "bert/encoder/layer_0/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_63"
+ op: "Add"
+ input: "truediv_18"
+ input: "mul_100"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_101"
+ op: "Mul"
+ input: "add"
+ input: "add_63"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_18"
+ op: "Sub"
+ input: "bert/encoder/layer_0/output/dense/kernel/read"
+ input: "mul_101"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_250"
+ op: "Assign"
+ input: "bert/encoder/layer_0/output/dense/kernel"
+ input: "sub_18"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_251"
+ op: "Assign"
+ input: "bert/encoder/layer_0/output/dense/kernel/adam_m"
+ input: "add_60"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_252"
+ op: "Assign"
+ input: "bert/encoder/layer_0/output/dense/kernel/adam_v"
+ input: "add_61"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dense/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dense/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dense/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_0/output/dense/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dense/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/output/dense/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dense/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dense/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dense/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_0/output/dense/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/dense/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/output/dense/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_102/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_102"
+ op: "Mul"
+ input: "Mul_102/x"
+ input: "bert/encoder/layer_0/output/dense/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_103/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_103"
+ op: "Mul"
+ input: "Mul_103/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_18"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_64"
+ op: "Add"
+ input: "Mul_102"
+ input: "Mul_103"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_104/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_104"
+ op: "Mul"
+ input: "Mul_104/x"
+ input: "bert/encoder/layer_0/output/dense/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_18"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_18"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_105/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_105"
+ op: "Mul"
+ input: "Mul_105/x"
+ input: "Square_18"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_65"
+ op: "Add"
+ input: "Mul_104"
+ input: "Mul_105"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_18"
+ op: "Sqrt"
+ input: "add_65"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_66/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_66"
+ op: "Add"
+ input: "Sqrt_18"
+ input: "add_66/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_19"
+ op: "RealDiv"
+ input: "add_64"
+ input: "add_66"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_106"
+ op: "Mul"
+ input: "add"
+ input: "truediv_19"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_19"
+ op: "Sub"
+ input: "bert/encoder/layer_0/output/dense/bias/read"
+ input: "mul_106"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_253"
+ op: "Assign"
+ input: "bert/encoder/layer_0/output/dense/bias"
+ input: "sub_19"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_254"
+ op: "Assign"
+ input: "bert/encoder/layer_0/output/dense/bias/adam_m"
+ input: "add_64"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_255"
+ op: "Assign"
+ input: "bert/encoder/layer_0/output/dense/bias/adam_v"
+ input: "add_65"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_107/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_107"
+ op: "Mul"
+ input: "Mul_107/x"
+ input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_108/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_108"
+ op: "Mul"
+ input: "Mul_108/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_19"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_67"
+ op: "Add"
+ input: "Mul_107"
+ input: "Mul_108"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_109/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_109"
+ op: "Mul"
+ input: "Mul_109/x"
+ input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_19"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_19"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_110/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_110"
+ op: "Mul"
+ input: "Mul_110/x"
+ input: "Square_19"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_68"
+ op: "Add"
+ input: "Mul_109"
+ input: "Mul_110"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_19"
+ op: "Sqrt"
+ input: "add_68"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_69/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_69"
+ op: "Add"
+ input: "Sqrt_19"
+ input: "add_69/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_20"
+ op: "RealDiv"
+ input: "add_67"
+ input: "add_69"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_111"
+ op: "Mul"
+ input: "add"
+ input: "truediv_20"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_20"
+ op: "Sub"
+ input: "bert/encoder/layer_0/output/LayerNorm/beta/read"
+ input: "mul_111"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_256"
+ op: "Assign"
+ input: "bert/encoder/layer_0/output/LayerNorm/beta"
+ input: "sub_20"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_257"
+ op: "Assign"
+ input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m"
+ input: "add_67"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_258"
+ op: "Assign"
+ input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v"
+ input: "add_68"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_112/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_112"
+ op: "Mul"
+ input: "Mul_112/x"
+ input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_113/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_113"
+ op: "Mul"
+ input: "Mul_113/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_20"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_70"
+ op: "Add"
+ input: "Mul_112"
+ input: "Mul_113"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_114/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_114"
+ op: "Mul"
+ input: "Mul_114/x"
+ input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_20"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_20"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_115/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_115"
+ op: "Mul"
+ input: "Mul_115/x"
+ input: "Square_20"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_71"
+ op: "Add"
+ input: "Mul_114"
+ input: "Mul_115"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_20"
+ op: "Sqrt"
+ input: "add_71"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_72/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_72"
+ op: "Add"
+ input: "Sqrt_20"
+ input: "add_72/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_21"
+ op: "RealDiv"
+ input: "add_70"
+ input: "add_72"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_116"
+ op: "Mul"
+ input: "add"
+ input: "truediv_21"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_21"
+ op: "Sub"
+ input: "bert/encoder/layer_0/output/LayerNorm/gamma/read"
+ input: "mul_116"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_259"
+ op: "Assign"
+ input: "bert/encoder/layer_0/output/LayerNorm/gamma"
+ input: "sub_21"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_260"
+ op: "Assign"
+ input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m"
+ input: "add_70"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_261"
+ op: "Assign"
+ input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v"
+ input: "add_71"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/query/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/query/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_1/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_1/attention/self/query/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/query/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/query/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/query/kernel/adam_m"
+ input: "bert/encoder/layer_1/attention/self/query/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/query/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/attention/self/query/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/query/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/query/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_1/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_1/attention/self/query/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/query/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/query/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/query/kernel/adam_v"
+ input: "bert/encoder/layer_1/attention/self/query/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/query/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/attention/self/query/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_117/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_117"
+ op: "Mul"
+ input: "Mul_117/x"
+ input: "bert/encoder/layer_1/attention/self/query/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_118/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_118"
+ op: "Mul"
+ input: "Mul_118/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_21"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_73"
+ op: "Add"
+ input: "Mul_117"
+ input: "Mul_118"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_119/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_119"
+ op: "Mul"
+ input: "Mul_119/x"
+ input: "bert/encoder/layer_1/attention/self/query/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_21"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_21"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_120/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_120"
+ op: "Mul"
+ input: "Mul_120/x"
+ input: "Square_21"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_74"
+ op: "Add"
+ input: "Mul_119"
+ input: "Mul_120"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_21"
+ op: "Sqrt"
+ input: "add_74"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_75/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_75"
+ op: "Add"
+ input: "Sqrt_21"
+ input: "add_75/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_22"
+ op: "RealDiv"
+ input: "add_73"
+ input: "add_75"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_121/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_121"
+ op: "Mul"
+ input: "mul_121/x"
+ input: "bert/encoder/layer_1/attention/self/query/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_76"
+ op: "Add"
+ input: "truediv_22"
+ input: "mul_121"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_122"
+ op: "Mul"
+ input: "add"
+ input: "add_76"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_22"
+ op: "Sub"
+ input: "bert/encoder/layer_1/attention/self/query/kernel/read"
+ input: "mul_122"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_262"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/query/kernel"
+ input: "sub_22"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_263"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/query/kernel/adam_m"
+ input: "add_73"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_264"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/query/kernel/adam_v"
+ input: "add_74"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/query/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/query/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/query/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/query/bias/adam_m"
+ input: "bert/encoder/layer_1/attention/self/query/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/query/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/attention/self/query/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/query/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/query/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/query/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/query/bias/adam_v"
+ input: "bert/encoder/layer_1/attention/self/query/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/query/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/attention/self/query/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_123/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_123"
+ op: "Mul"
+ input: "Mul_123/x"
+ input: "bert/encoder/layer_1/attention/self/query/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_124/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_124"
+ op: "Mul"
+ input: "Mul_124/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_22"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_77"
+ op: "Add"
+ input: "Mul_123"
+ input: "Mul_124"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_125/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_125"
+ op: "Mul"
+ input: "Mul_125/x"
+ input: "bert/encoder/layer_1/attention/self/query/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_22"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_22"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_126/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_126"
+ op: "Mul"
+ input: "Mul_126/x"
+ input: "Square_22"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_78"
+ op: "Add"
+ input: "Mul_125"
+ input: "Mul_126"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_22"
+ op: "Sqrt"
+ input: "add_78"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_79/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_79"
+ op: "Add"
+ input: "Sqrt_22"
+ input: "add_79/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_23"
+ op: "RealDiv"
+ input: "add_77"
+ input: "add_79"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_127"
+ op: "Mul"
+ input: "add"
+ input: "truediv_23"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_23"
+ op: "Sub"
+ input: "bert/encoder/layer_1/attention/self/query/bias/read"
+ input: "mul_127"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_265"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/query/bias"
+ input: "sub_23"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_266"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/query/bias/adam_m"
+ input: "add_77"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_267"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/query/bias/adam_v"
+ input: "add_78"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/key/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/key/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_1/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_1/attention/self/key/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/key/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/key/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/key/kernel/adam_m"
+ input: "bert/encoder/layer_1/attention/self/key/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/key/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/attention/self/key/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/key/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/key/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_1/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_1/attention/self/key/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/key/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/key/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/key/kernel/adam_v"
+ input: "bert/encoder/layer_1/attention/self/key/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/key/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/attention/self/key/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_128/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_128"
+ op: "Mul"
+ input: "Mul_128/x"
+ input: "bert/encoder/layer_1/attention/self/key/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_129/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_129"
+ op: "Mul"
+ input: "Mul_129/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_23"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_80"
+ op: "Add"
+ input: "Mul_128"
+ input: "Mul_129"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_130/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_130"
+ op: "Mul"
+ input: "Mul_130/x"
+ input: "bert/encoder/layer_1/attention/self/key/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_23"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_23"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_131/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_131"
+ op: "Mul"
+ input: "Mul_131/x"
+ input: "Square_23"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_81"
+ op: "Add"
+ input: "Mul_130"
+ input: "Mul_131"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_23"
+ op: "Sqrt"
+ input: "add_81"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_82/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_82"
+ op: "Add"
+ input: "Sqrt_23"
+ input: "add_82/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_24"
+ op: "RealDiv"
+ input: "add_80"
+ input: "add_82"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_132/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_132"
+ op: "Mul"
+ input: "mul_132/x"
+ input: "bert/encoder/layer_1/attention/self/key/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_83"
+ op: "Add"
+ input: "truediv_24"
+ input: "mul_132"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_133"
+ op: "Mul"
+ input: "add"
+ input: "add_83"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_24"
+ op: "Sub"
+ input: "bert/encoder/layer_1/attention/self/key/kernel/read"
+ input: "mul_133"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_268"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/key/kernel"
+ input: "sub_24"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_269"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/key/kernel/adam_m"
+ input: "add_80"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_270"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/key/kernel/adam_v"
+ input: "add_81"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/key/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/key/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/key/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/key/bias/adam_m"
+ input: "bert/encoder/layer_1/attention/self/key/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/key/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/attention/self/key/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/key/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/key/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/key/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/key/bias/adam_v"
+ input: "bert/encoder/layer_1/attention/self/key/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/key/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/attention/self/key/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_134/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_134"
+ op: "Mul"
+ input: "Mul_134/x"
+ input: "bert/encoder/layer_1/attention/self/key/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_135/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_135"
+ op: "Mul"
+ input: "Mul_135/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_24"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_84"
+ op: "Add"
+ input: "Mul_134"
+ input: "Mul_135"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_136/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_136"
+ op: "Mul"
+ input: "Mul_136/x"
+ input: "bert/encoder/layer_1/attention/self/key/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_24"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_24"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_137/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_137"
+ op: "Mul"
+ input: "Mul_137/x"
+ input: "Square_24"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_85"
+ op: "Add"
+ input: "Mul_136"
+ input: "Mul_137"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_24"
+ op: "Sqrt"
+ input: "add_85"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_86/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_86"
+ op: "Add"
+ input: "Sqrt_24"
+ input: "add_86/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_25"
+ op: "RealDiv"
+ input: "add_84"
+ input: "add_86"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_138"
+ op: "Mul"
+ input: "add"
+ input: "truediv_25"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_25"
+ op: "Sub"
+ input: "bert/encoder/layer_1/attention/self/key/bias/read"
+ input: "mul_138"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_271"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/key/bias"
+ input: "sub_25"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_272"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/key/bias/adam_m"
+ input: "add_84"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_273"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/key/bias/adam_v"
+ input: "add_85"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/value/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/value/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_1/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_1/attention/self/value/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/value/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/value/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/value/kernel/adam_m"
+ input: "bert/encoder/layer_1/attention/self/value/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/value/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/attention/self/value/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/value/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/value/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_1/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_1/attention/self/value/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/value/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/value/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/value/kernel/adam_v"
+ input: "bert/encoder/layer_1/attention/self/value/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/value/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/attention/self/value/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_139/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_139"
+ op: "Mul"
+ input: "Mul_139/x"
+ input: "bert/encoder/layer_1/attention/self/value/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_140/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_140"
+ op: "Mul"
+ input: "Mul_140/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_25"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_87"
+ op: "Add"
+ input: "Mul_139"
+ input: "Mul_140"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_141/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_141"
+ op: "Mul"
+ input: "Mul_141/x"
+ input: "bert/encoder/layer_1/attention/self/value/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_25"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_25"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_142/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_142"
+ op: "Mul"
+ input: "Mul_142/x"
+ input: "Square_25"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_88"
+ op: "Add"
+ input: "Mul_141"
+ input: "Mul_142"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_25"
+ op: "Sqrt"
+ input: "add_88"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_89/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_89"
+ op: "Add"
+ input: "Sqrt_25"
+ input: "add_89/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_26"
+ op: "RealDiv"
+ input: "add_87"
+ input: "add_89"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_143/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_143"
+ op: "Mul"
+ input: "mul_143/x"
+ input: "bert/encoder/layer_1/attention/self/value/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_90"
+ op: "Add"
+ input: "truediv_26"
+ input: "mul_143"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_144"
+ op: "Mul"
+ input: "add"
+ input: "add_90"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_26"
+ op: "Sub"
+ input: "bert/encoder/layer_1/attention/self/value/kernel/read"
+ input: "mul_144"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_274"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/value/kernel"
+ input: "sub_26"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_275"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/value/kernel/adam_m"
+ input: "add_87"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_276"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/value/kernel/adam_v"
+ input: "add_88"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/value/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/value/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/value/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/value/bias/adam_m"
+ input: "bert/encoder/layer_1/attention/self/value/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/value/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/attention/self/value/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/value/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/value/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/value/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/value/bias/adam_v"
+ input: "bert/encoder/layer_1/attention/self/value/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/self/value/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/attention/self/value/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_145/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_145"
+ op: "Mul"
+ input: "Mul_145/x"
+ input: "bert/encoder/layer_1/attention/self/value/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_146/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_146"
+ op: "Mul"
+ input: "Mul_146/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_26"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_91"
+ op: "Add"
+ input: "Mul_145"
+ input: "Mul_146"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_147/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_147"
+ op: "Mul"
+ input: "Mul_147/x"
+ input: "bert/encoder/layer_1/attention/self/value/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_26"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_26"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_148/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_148"
+ op: "Mul"
+ input: "Mul_148/x"
+ input: "Square_26"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_92"
+ op: "Add"
+ input: "Mul_147"
+ input: "Mul_148"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_26"
+ op: "Sqrt"
+ input: "add_92"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_93/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_93"
+ op: "Add"
+ input: "Sqrt_26"
+ input: "add_93/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_27"
+ op: "RealDiv"
+ input: "add_91"
+ input: "add_93"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_149"
+ op: "Mul"
+ input: "add"
+ input: "truediv_27"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_27"
+ op: "Sub"
+ input: "bert/encoder/layer_1/attention/self/value/bias/read"
+ input: "mul_149"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_277"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/value/bias"
+ input: "sub_27"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_278"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/value/bias/adam_m"
+ input: "add_91"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_279"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/value/bias/adam_v"
+ input: "add_92"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_150/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_150"
+ op: "Mul"
+ input: "Mul_150/x"
+ input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_151/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_151"
+ op: "Mul"
+ input: "Mul_151/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_27"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_94"
+ op: "Add"
+ input: "Mul_150"
+ input: "Mul_151"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_152/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_152"
+ op: "Mul"
+ input: "Mul_152/x"
+ input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_27"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_27"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_153/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_153"
+ op: "Mul"
+ input: "Mul_153/x"
+ input: "Square_27"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_95"
+ op: "Add"
+ input: "Mul_152"
+ input: "Mul_153"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_27"
+ op: "Sqrt"
+ input: "add_95"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_96/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_96"
+ op: "Add"
+ input: "Sqrt_27"
+ input: "add_96/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_28"
+ op: "RealDiv"
+ input: "add_94"
+ input: "add_96"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_154/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_154"
+ op: "Mul"
+ input: "mul_154/x"
+ input: "bert/encoder/layer_1/attention/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_97"
+ op: "Add"
+ input: "truediv_28"
+ input: "mul_154"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_155"
+ op: "Mul"
+ input: "add"
+ input: "add_97"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_28"
+ op: "Sub"
+ input: "bert/encoder/layer_1/attention/output/dense/kernel/read"
+ input: "mul_155"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_280"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/output/dense/kernel"
+ input: "sub_28"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_281"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m"
+ input: "add_94"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_282"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v"
+ input: "add_95"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dense/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dense/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dense/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_1/attention/output/dense/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dense/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/attention/output/dense/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dense/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dense/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dense/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_1/attention/output/dense/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/dense/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/attention/output/dense/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_156/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_156"
+ op: "Mul"
+ input: "Mul_156/x"
+ input: "bert/encoder/layer_1/attention/output/dense/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_157/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_157"
+ op: "Mul"
+ input: "Mul_157/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_28"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_98"
+ op: "Add"
+ input: "Mul_156"
+ input: "Mul_157"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_158/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_158"
+ op: "Mul"
+ input: "Mul_158/x"
+ input: "bert/encoder/layer_1/attention/output/dense/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_28"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_28"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_159/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_159"
+ op: "Mul"
+ input: "Mul_159/x"
+ input: "Square_28"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_99"
+ op: "Add"
+ input: "Mul_158"
+ input: "Mul_159"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_28"
+ op: "Sqrt"
+ input: "add_99"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_100/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_100"
+ op: "Add"
+ input: "Sqrt_28"
+ input: "add_100/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_29"
+ op: "RealDiv"
+ input: "add_98"
+ input: "add_100"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_160"
+ op: "Mul"
+ input: "add"
+ input: "truediv_29"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_29"
+ op: "Sub"
+ input: "bert/encoder/layer_1/attention/output/dense/bias/read"
+ input: "mul_160"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_283"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/output/dense/bias"
+ input: "sub_29"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_284"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/output/dense/bias/adam_m"
+ input: "add_98"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_285"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/output/dense/bias/adam_v"
+ input: "add_99"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_161/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_161"
+ op: "Mul"
+ input: "Mul_161/x"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_162/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_162"
+ op: "Mul"
+ input: "Mul_162/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_29"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_101"
+ op: "Add"
+ input: "Mul_161"
+ input: "Mul_162"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_163/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_163"
+ op: "Mul"
+ input: "Mul_163/x"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_29"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_29"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_164/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_164"
+ op: "Mul"
+ input: "Mul_164/x"
+ input: "Square_29"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_102"
+ op: "Add"
+ input: "Mul_163"
+ input: "Mul_164"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_29"
+ op: "Sqrt"
+ input: "add_102"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_103/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_103"
+ op: "Add"
+ input: "Sqrt_29"
+ input: "add_103/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_30"
+ op: "RealDiv"
+ input: "add_101"
+ input: "add_103"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_165"
+ op: "Mul"
+ input: "add"
+ input: "truediv_30"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_30"
+ op: "Sub"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/read"
+ input: "mul_165"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_286"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/beta"
+ input: "sub_30"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_287"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m"
+ input: "add_101"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_288"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v"
+ input: "add_102"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_166/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_166"
+ op: "Mul"
+ input: "Mul_166/x"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_167/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_167"
+ op: "Mul"
+ input: "Mul_167/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_30"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_104"
+ op: "Add"
+ input: "Mul_166"
+ input: "Mul_167"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_168/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_168"
+ op: "Mul"
+ input: "Mul_168/x"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_30"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_30"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_169/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_169"
+ op: "Mul"
+ input: "Mul_169/x"
+ input: "Square_30"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_105"
+ op: "Add"
+ input: "Mul_168"
+ input: "Mul_169"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_30"
+ op: "Sqrt"
+ input: "add_105"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_106/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_106"
+ op: "Add"
+ input: "Sqrt_30"
+ input: "add_106/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_31"
+ op: "RealDiv"
+ input: "add_104"
+ input: "add_106"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_170"
+ op: "Mul"
+ input: "add"
+ input: "truediv_31"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_31"
+ op: "Sub"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/read"
+ input: "mul_170"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_289"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma"
+ input: "sub_31"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_290"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m"
+ input: "add_104"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_291"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v"
+ input: "add_105"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m"
+ input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v"
+ input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_171/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_171"
+ op: "Mul"
+ input: "Mul_171/x"
+ input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_172/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_172"
+ op: "Mul"
+ input: "Mul_172/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_31"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_107"
+ op: "Add"
+ input: "Mul_171"
+ input: "Mul_172"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_173/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_173"
+ op: "Mul"
+ input: "Mul_173/x"
+ input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_31"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_31"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_174/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_174"
+ op: "Mul"
+ input: "Mul_174/x"
+ input: "Square_31"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_108"
+ op: "Add"
+ input: "Mul_173"
+ input: "Mul_174"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_31"
+ op: "Sqrt"
+ input: "add_108"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_109/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_109"
+ op: "Add"
+ input: "Sqrt_31"
+ input: "add_109/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_32"
+ op: "RealDiv"
+ input: "add_107"
+ input: "add_109"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_175/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_175"
+ op: "Mul"
+ input: "mul_175/x"
+ input: "bert/encoder/layer_1/intermediate/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_110"
+ op: "Add"
+ input: "truediv_32"
+ input: "mul_175"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_176"
+ op: "Mul"
+ input: "add"
+ input: "add_110"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_32"
+ op: "Sub"
+ input: "bert/encoder/layer_1/intermediate/dense/kernel/read"
+ input: "mul_176"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_292"
+ op: "Assign"
+ input: "bert/encoder/layer_1/intermediate/dense/kernel"
+ input: "sub_32"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_293"
+ op: "Assign"
+ input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m"
+ input: "add_107"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_294"
+ op: "Assign"
+ input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v"
+ input: "add_108"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 3072
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/bias/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/bias/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_1/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_1/intermediate/dense/bias/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/intermediate/dense/bias/adam_m"
+ input: "bert/encoder/layer_1/intermediate/dense/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/intermediate/dense/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 3072
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/bias/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/bias/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_1/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_1/intermediate/dense/bias/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/intermediate/dense/bias/adam_v"
+ input: "bert/encoder/layer_1/intermediate/dense/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/intermediate/dense/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/intermediate/dense/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_177/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_177"
+ op: "Mul"
+ input: "Mul_177/x"
+ input: "bert/encoder/layer_1/intermediate/dense/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_178/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_178"
+ op: "Mul"
+ input: "Mul_178/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_32"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_111"
+ op: "Add"
+ input: "Mul_177"
+ input: "Mul_178"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_179/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_179"
+ op: "Mul"
+ input: "Mul_179/x"
+ input: "bert/encoder/layer_1/intermediate/dense/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_32"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_32"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_180/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_180"
+ op: "Mul"
+ input: "Mul_180/x"
+ input: "Square_32"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_112"
+ op: "Add"
+ input: "Mul_179"
+ input: "Mul_180"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_32"
+ op: "Sqrt"
+ input: "add_112"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_113/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_113"
+ op: "Add"
+ input: "Sqrt_32"
+ input: "add_113/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_33"
+ op: "RealDiv"
+ input: "add_111"
+ input: "add_113"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_181"
+ op: "Mul"
+ input: "add"
+ input: "truediv_33"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_33"
+ op: "Sub"
+ input: "bert/encoder/layer_1/intermediate/dense/bias/read"
+ input: "mul_181"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_295"
+ op: "Assign"
+ input: "bert/encoder/layer_1/intermediate/dense/bias"
+ input: "sub_33"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_296"
+ op: "Assign"
+ input: "bert/encoder/layer_1/intermediate/dense/bias/adam_m"
+ input: "add_111"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_297"
+ op: "Assign"
+ input: "bert/encoder/layer_1/intermediate/dense/bias/adam_v"
+ input: "add_112"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\014\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dense/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_1/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_1/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dense/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dense/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_1/output/dense/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dense/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/output/dense/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\014\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dense/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_1/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_1/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dense/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dense/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_1/output/dense/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dense/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/output/dense/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_182/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_182"
+ op: "Mul"
+ input: "Mul_182/x"
+ input: "bert/encoder/layer_1/output/dense/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_183/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_183"
+ op: "Mul"
+ input: "Mul_183/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_33"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_114"
+ op: "Add"
+ input: "Mul_182"
+ input: "Mul_183"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_184/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_184"
+ op: "Mul"
+ input: "Mul_184/x"
+ input: "bert/encoder/layer_1/output/dense/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_33"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_33"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_185/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_185"
+ op: "Mul"
+ input: "Mul_185/x"
+ input: "Square_33"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_115"
+ op: "Add"
+ input: "Mul_184"
+ input: "Mul_185"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_33"
+ op: "Sqrt"
+ input: "add_115"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_116/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_116"
+ op: "Add"
+ input: "Sqrt_33"
+ input: "add_116/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_34"
+ op: "RealDiv"
+ input: "add_114"
+ input: "add_116"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_186/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_186"
+ op: "Mul"
+ input: "mul_186/x"
+ input: "bert/encoder/layer_1/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_117"
+ op: "Add"
+ input: "truediv_34"
+ input: "mul_186"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_187"
+ op: "Mul"
+ input: "add"
+ input: "add_117"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_34"
+ op: "Sub"
+ input: "bert/encoder/layer_1/output/dense/kernel/read"
+ input: "mul_187"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_298"
+ op: "Assign"
+ input: "bert/encoder/layer_1/output/dense/kernel"
+ input: "sub_34"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_299"
+ op: "Assign"
+ input: "bert/encoder/layer_1/output/dense/kernel/adam_m"
+ input: "add_114"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_300"
+ op: "Assign"
+ input: "bert/encoder/layer_1/output/dense/kernel/adam_v"
+ input: "add_115"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dense/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dense/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dense/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_1/output/dense/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dense/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/output/dense/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dense/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dense/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dense/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_1/output/dense/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/dense/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/output/dense/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_188/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_188"
+ op: "Mul"
+ input: "Mul_188/x"
+ input: "bert/encoder/layer_1/output/dense/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_189/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_189"
+ op: "Mul"
+ input: "Mul_189/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_34"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_118"
+ op: "Add"
+ input: "Mul_188"
+ input: "Mul_189"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_190/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_190"
+ op: "Mul"
+ input: "Mul_190/x"
+ input: "bert/encoder/layer_1/output/dense/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_34"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_34"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_191/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_191"
+ op: "Mul"
+ input: "Mul_191/x"
+ input: "Square_34"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_119"
+ op: "Add"
+ input: "Mul_190"
+ input: "Mul_191"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_34"
+ op: "Sqrt"
+ input: "add_119"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_120/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_120"
+ op: "Add"
+ input: "Sqrt_34"
+ input: "add_120/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_35"
+ op: "RealDiv"
+ input: "add_118"
+ input: "add_120"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_192"
+ op: "Mul"
+ input: "add"
+ input: "truediv_35"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_35"
+ op: "Sub"
+ input: "bert/encoder/layer_1/output/dense/bias/read"
+ input: "mul_192"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_301"
+ op: "Assign"
+ input: "bert/encoder/layer_1/output/dense/bias"
+ input: "sub_35"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_302"
+ op: "Assign"
+ input: "bert/encoder/layer_1/output/dense/bias/adam_m"
+ input: "add_118"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_303"
+ op: "Assign"
+ input: "bert/encoder/layer_1/output/dense/bias/adam_v"
+ input: "add_119"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_193/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_193"
+ op: "Mul"
+ input: "Mul_193/x"
+ input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_194/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_194"
+ op: "Mul"
+ input: "Mul_194/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_35"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_121"
+ op: "Add"
+ input: "Mul_193"
+ input: "Mul_194"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_195/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_195"
+ op: "Mul"
+ input: "Mul_195/x"
+ input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_35"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_35"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_196/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_196"
+ op: "Mul"
+ input: "Mul_196/x"
+ input: "Square_35"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_122"
+ op: "Add"
+ input: "Mul_195"
+ input: "Mul_196"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_35"
+ op: "Sqrt"
+ input: "add_122"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_123/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_123"
+ op: "Add"
+ input: "Sqrt_35"
+ input: "add_123/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_36"
+ op: "RealDiv"
+ input: "add_121"
+ input: "add_123"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_197"
+ op: "Mul"
+ input: "add"
+ input: "truediv_36"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_36"
+ op: "Sub"
+ input: "bert/encoder/layer_1/output/LayerNorm/beta/read"
+ input: "mul_197"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_304"
+ op: "Assign"
+ input: "bert/encoder/layer_1/output/LayerNorm/beta"
+ input: "sub_36"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_305"
+ op: "Assign"
+ input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m"
+ input: "add_121"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_306"
+ op: "Assign"
+ input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v"
+ input: "add_122"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_198/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_198"
+ op: "Mul"
+ input: "Mul_198/x"
+ input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_199/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_199"
+ op: "Mul"
+ input: "Mul_199/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_36"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_124"
+ op: "Add"
+ input: "Mul_198"
+ input: "Mul_199"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_200/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_200"
+ op: "Mul"
+ input: "Mul_200/x"
+ input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_36"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_36"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_201/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_201"
+ op: "Mul"
+ input: "Mul_201/x"
+ input: "Square_36"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_125"
+ op: "Add"
+ input: "Mul_200"
+ input: "Mul_201"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_36"
+ op: "Sqrt"
+ input: "add_125"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_126/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_126"
+ op: "Add"
+ input: "Sqrt_36"
+ input: "add_126/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_37"
+ op: "RealDiv"
+ input: "add_124"
+ input: "add_126"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_202"
+ op: "Mul"
+ input: "add"
+ input: "truediv_37"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_37"
+ op: "Sub"
+ input: "bert/encoder/layer_1/output/LayerNorm/gamma/read"
+ input: "mul_202"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_307"
+ op: "Assign"
+ input: "bert/encoder/layer_1/output/LayerNorm/gamma"
+ input: "sub_37"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_308"
+ op: "Assign"
+ input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m"
+ input: "add_124"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_309"
+ op: "Assign"
+ input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v"
+ input: "add_125"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/query/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/query/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_2/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_2/attention/self/query/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/query/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/query/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/query/kernel/adam_m"
+ input: "bert/encoder/layer_2/attention/self/query/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/query/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/attention/self/query/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/query/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/query/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_2/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_2/attention/self/query/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/query/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/query/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/query/kernel/adam_v"
+ input: "bert/encoder/layer_2/attention/self/query/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/query/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/attention/self/query/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_203/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_203"
+ op: "Mul"
+ input: "Mul_203/x"
+ input: "bert/encoder/layer_2/attention/self/query/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_204/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_204"
+ op: "Mul"
+ input: "Mul_204/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_37"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_127"
+ op: "Add"
+ input: "Mul_203"
+ input: "Mul_204"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_205/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_205"
+ op: "Mul"
+ input: "Mul_205/x"
+ input: "bert/encoder/layer_2/attention/self/query/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_37"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_37"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_206/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_206"
+ op: "Mul"
+ input: "Mul_206/x"
+ input: "Square_37"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_128"
+ op: "Add"
+ input: "Mul_205"
+ input: "Mul_206"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_37"
+ op: "Sqrt"
+ input: "add_128"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_129/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_129"
+ op: "Add"
+ input: "Sqrt_37"
+ input: "add_129/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_38"
+ op: "RealDiv"
+ input: "add_127"
+ input: "add_129"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_207/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_207"
+ op: "Mul"
+ input: "mul_207/x"
+ input: "bert/encoder/layer_2/attention/self/query/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_130"
+ op: "Add"
+ input: "truediv_38"
+ input: "mul_207"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_208"
+ op: "Mul"
+ input: "add"
+ input: "add_130"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_38"
+ op: "Sub"
+ input: "bert/encoder/layer_2/attention/self/query/kernel/read"
+ input: "mul_208"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_310"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/query/kernel"
+ input: "sub_38"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_311"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/query/kernel/adam_m"
+ input: "add_127"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_312"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/query/kernel/adam_v"
+ input: "add_128"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/query/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/query/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/query/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/query/bias/adam_m"
+ input: "bert/encoder/layer_2/attention/self/query/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/query/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/attention/self/query/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/query/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/query/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/query/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/query/bias/adam_v"
+ input: "bert/encoder/layer_2/attention/self/query/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/query/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/attention/self/query/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_209/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_209"
+ op: "Mul"
+ input: "Mul_209/x"
+ input: "bert/encoder/layer_2/attention/self/query/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_210/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_210"
+ op: "Mul"
+ input: "Mul_210/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_38"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_131"
+ op: "Add"
+ input: "Mul_209"
+ input: "Mul_210"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_211/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_211"
+ op: "Mul"
+ input: "Mul_211/x"
+ input: "bert/encoder/layer_2/attention/self/query/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_38"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_38"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_212/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_212"
+ op: "Mul"
+ input: "Mul_212/x"
+ input: "Square_38"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_132"
+ op: "Add"
+ input: "Mul_211"
+ input: "Mul_212"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_38"
+ op: "Sqrt"
+ input: "add_132"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_133/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_133"
+ op: "Add"
+ input: "Sqrt_38"
+ input: "add_133/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_39"
+ op: "RealDiv"
+ input: "add_131"
+ input: "add_133"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_213"
+ op: "Mul"
+ input: "add"
+ input: "truediv_39"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_39"
+ op: "Sub"
+ input: "bert/encoder/layer_2/attention/self/query/bias/read"
+ input: "mul_213"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_313"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/query/bias"
+ input: "sub_39"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_314"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/query/bias/adam_m"
+ input: "add_131"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_315"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/query/bias/adam_v"
+ input: "add_132"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/key/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/key/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_2/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_2/attention/self/key/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/key/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/key/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/key/kernel/adam_m"
+ input: "bert/encoder/layer_2/attention/self/key/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/key/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/attention/self/key/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/key/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/key/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_2/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_2/attention/self/key/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/key/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/key/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/key/kernel/adam_v"
+ input: "bert/encoder/layer_2/attention/self/key/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/key/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/attention/self/key/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_214/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_214"
+ op: "Mul"
+ input: "Mul_214/x"
+ input: "bert/encoder/layer_2/attention/self/key/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_215/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_215"
+ op: "Mul"
+ input: "Mul_215/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_39"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_134"
+ op: "Add"
+ input: "Mul_214"
+ input: "Mul_215"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_216/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_216"
+ op: "Mul"
+ input: "Mul_216/x"
+ input: "bert/encoder/layer_2/attention/self/key/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_39"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_39"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_217/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_217"
+ op: "Mul"
+ input: "Mul_217/x"
+ input: "Square_39"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_135"
+ op: "Add"
+ input: "Mul_216"
+ input: "Mul_217"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_39"
+ op: "Sqrt"
+ input: "add_135"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_136/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_136"
+ op: "Add"
+ input: "Sqrt_39"
+ input: "add_136/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_40"
+ op: "RealDiv"
+ input: "add_134"
+ input: "add_136"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_218/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_218"
+ op: "Mul"
+ input: "mul_218/x"
+ input: "bert/encoder/layer_2/attention/self/key/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_137"
+ op: "Add"
+ input: "truediv_40"
+ input: "mul_218"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_219"
+ op: "Mul"
+ input: "add"
+ input: "add_137"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_40"
+ op: "Sub"
+ input: "bert/encoder/layer_2/attention/self/key/kernel/read"
+ input: "mul_219"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_316"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/key/kernel"
+ input: "sub_40"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_317"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/key/kernel/adam_m"
+ input: "add_134"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_318"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/key/kernel/adam_v"
+ input: "add_135"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/key/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/key/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/key/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/key/bias/adam_m"
+ input: "bert/encoder/layer_2/attention/self/key/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/key/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/attention/self/key/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/key/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/key/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/key/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/key/bias/adam_v"
+ input: "bert/encoder/layer_2/attention/self/key/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/key/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/attention/self/key/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_220/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_220"
+ op: "Mul"
+ input: "Mul_220/x"
+ input: "bert/encoder/layer_2/attention/self/key/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_221/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_221"
+ op: "Mul"
+ input: "Mul_221/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_40"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_138"
+ op: "Add"
+ input: "Mul_220"
+ input: "Mul_221"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_222/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_222"
+ op: "Mul"
+ input: "Mul_222/x"
+ input: "bert/encoder/layer_2/attention/self/key/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_40"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_40"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_223/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_223"
+ op: "Mul"
+ input: "Mul_223/x"
+ input: "Square_40"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_139"
+ op: "Add"
+ input: "Mul_222"
+ input: "Mul_223"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_40"
+ op: "Sqrt"
+ input: "add_139"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_140/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_140"
+ op: "Add"
+ input: "Sqrt_40"
+ input: "add_140/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_41"
+ op: "RealDiv"
+ input: "add_138"
+ input: "add_140"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_224"
+ op: "Mul"
+ input: "add"
+ input: "truediv_41"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_41"
+ op: "Sub"
+ input: "bert/encoder/layer_2/attention/self/key/bias/read"
+ input: "mul_224"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_319"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/key/bias"
+ input: "sub_41"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_320"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/key/bias/adam_m"
+ input: "add_138"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_321"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/key/bias/adam_v"
+ input: "add_139"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/value/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/value/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_2/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_2/attention/self/value/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/value/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/value/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/value/kernel/adam_m"
+ input: "bert/encoder/layer_2/attention/self/value/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/value/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/attention/self/value/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/value/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/value/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_2/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_2/attention/self/value/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/value/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/value/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/value/kernel/adam_v"
+ input: "bert/encoder/layer_2/attention/self/value/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/value/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/attention/self/value/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_225/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_225"
+ op: "Mul"
+ input: "Mul_225/x"
+ input: "bert/encoder/layer_2/attention/self/value/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_226/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_226"
+ op: "Mul"
+ input: "Mul_226/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_41"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_141"
+ op: "Add"
+ input: "Mul_225"
+ input: "Mul_226"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_227/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_227"
+ op: "Mul"
+ input: "Mul_227/x"
+ input: "bert/encoder/layer_2/attention/self/value/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_41"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_41"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_228/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_228"
+ op: "Mul"
+ input: "Mul_228/x"
+ input: "Square_41"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_142"
+ op: "Add"
+ input: "Mul_227"
+ input: "Mul_228"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_41"
+ op: "Sqrt"
+ input: "add_142"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_143/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_143"
+ op: "Add"
+ input: "Sqrt_41"
+ input: "add_143/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_42"
+ op: "RealDiv"
+ input: "add_141"
+ input: "add_143"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_229/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_229"
+ op: "Mul"
+ input: "mul_229/x"
+ input: "bert/encoder/layer_2/attention/self/value/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_144"
+ op: "Add"
+ input: "truediv_42"
+ input: "mul_229"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_230"
+ op: "Mul"
+ input: "add"
+ input: "add_144"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_42"
+ op: "Sub"
+ input: "bert/encoder/layer_2/attention/self/value/kernel/read"
+ input: "mul_230"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_322"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/value/kernel"
+ input: "sub_42"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_323"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/value/kernel/adam_m"
+ input: "add_141"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_324"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/value/kernel/adam_v"
+ input: "add_142"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/value/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/value/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/value/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/value/bias/adam_m"
+ input: "bert/encoder/layer_2/attention/self/value/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/value/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/attention/self/value/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/value/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/value/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/value/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/value/bias/adam_v"
+ input: "bert/encoder/layer_2/attention/self/value/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/self/value/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/attention/self/value/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_231/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_231"
+ op: "Mul"
+ input: "Mul_231/x"
+ input: "bert/encoder/layer_2/attention/self/value/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_232/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_232"
+ op: "Mul"
+ input: "Mul_232/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_42"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_145"
+ op: "Add"
+ input: "Mul_231"
+ input: "Mul_232"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_233/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_233"
+ op: "Mul"
+ input: "Mul_233/x"
+ input: "bert/encoder/layer_2/attention/self/value/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_42"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_42"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_234/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_234"
+ op: "Mul"
+ input: "Mul_234/x"
+ input: "Square_42"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_146"
+ op: "Add"
+ input: "Mul_233"
+ input: "Mul_234"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_42"
+ op: "Sqrt"
+ input: "add_146"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_147/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_147"
+ op: "Add"
+ input: "Sqrt_42"
+ input: "add_147/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_43"
+ op: "RealDiv"
+ input: "add_145"
+ input: "add_147"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_235"
+ op: "Mul"
+ input: "add"
+ input: "truediv_43"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_43"
+ op: "Sub"
+ input: "bert/encoder/layer_2/attention/self/value/bias/read"
+ input: "mul_235"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_325"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/value/bias"
+ input: "sub_43"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_326"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/value/bias/adam_m"
+ input: "add_145"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_327"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/value/bias/adam_v"
+ input: "add_146"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_236/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_236"
+ op: "Mul"
+ input: "Mul_236/x"
+ input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_237/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_237"
+ op: "Mul"
+ input: "Mul_237/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_43"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_148"
+ op: "Add"
+ input: "Mul_236"
+ input: "Mul_237"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_238/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_238"
+ op: "Mul"
+ input: "Mul_238/x"
+ input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_43"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_43"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_239/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_239"
+ op: "Mul"
+ input: "Mul_239/x"
+ input: "Square_43"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_149"
+ op: "Add"
+ input: "Mul_238"
+ input: "Mul_239"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_43"
+ op: "Sqrt"
+ input: "add_149"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_150/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_150"
+ op: "Add"
+ input: "Sqrt_43"
+ input: "add_150/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_44"
+ op: "RealDiv"
+ input: "add_148"
+ input: "add_150"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_240/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_240"
+ op: "Mul"
+ input: "mul_240/x"
+ input: "bert/encoder/layer_2/attention/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_151"
+ op: "Add"
+ input: "truediv_44"
+ input: "mul_240"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_241"
+ op: "Mul"
+ input: "add"
+ input: "add_151"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_44"
+ op: "Sub"
+ input: "bert/encoder/layer_2/attention/output/dense/kernel/read"
+ input: "mul_241"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_328"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/output/dense/kernel"
+ input: "sub_44"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_329"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m"
+ input: "add_148"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_330"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v"
+ input: "add_149"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dense/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dense/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dense/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_2/attention/output/dense/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dense/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/attention/output/dense/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dense/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dense/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dense/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_2/attention/output/dense/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/dense/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/attention/output/dense/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_242/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_242"
+ op: "Mul"
+ input: "Mul_242/x"
+ input: "bert/encoder/layer_2/attention/output/dense/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_243/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_243"
+ op: "Mul"
+ input: "Mul_243/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_44"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_152"
+ op: "Add"
+ input: "Mul_242"
+ input: "Mul_243"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_244/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_244"
+ op: "Mul"
+ input: "Mul_244/x"
+ input: "bert/encoder/layer_2/attention/output/dense/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_44"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_44"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_245/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_245"
+ op: "Mul"
+ input: "Mul_245/x"
+ input: "Square_44"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_153"
+ op: "Add"
+ input: "Mul_244"
+ input: "Mul_245"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_44"
+ op: "Sqrt"
+ input: "add_153"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_154/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_154"
+ op: "Add"
+ input: "Sqrt_44"
+ input: "add_154/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_45"
+ op: "RealDiv"
+ input: "add_152"
+ input: "add_154"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_246"
+ op: "Mul"
+ input: "add"
+ input: "truediv_45"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_45"
+ op: "Sub"
+ input: "bert/encoder/layer_2/attention/output/dense/bias/read"
+ input: "mul_246"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_331"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/output/dense/bias"
+ input: "sub_45"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_332"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/output/dense/bias/adam_m"
+ input: "add_152"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_333"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/output/dense/bias/adam_v"
+ input: "add_153"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_247/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_247"
+ op: "Mul"
+ input: "Mul_247/x"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_248/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_248"
+ op: "Mul"
+ input: "Mul_248/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_45"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_155"
+ op: "Add"
+ input: "Mul_247"
+ input: "Mul_248"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_249/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_249"
+ op: "Mul"
+ input: "Mul_249/x"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_45"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_45"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_250/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_250"
+ op: "Mul"
+ input: "Mul_250/x"
+ input: "Square_45"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_156"
+ op: "Add"
+ input: "Mul_249"
+ input: "Mul_250"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_45"
+ op: "Sqrt"
+ input: "add_156"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_157/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_157"
+ op: "Add"
+ input: "Sqrt_45"
+ input: "add_157/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_46"
+ op: "RealDiv"
+ input: "add_155"
+ input: "add_157"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_251"
+ op: "Mul"
+ input: "add"
+ input: "truediv_46"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_46"
+ op: "Sub"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/read"
+ input: "mul_251"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_334"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/beta"
+ input: "sub_46"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_335"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m"
+ input: "add_155"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_336"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v"
+ input: "add_156"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_252/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_252"
+ op: "Mul"
+ input: "Mul_252/x"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_253/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_253"
+ op: "Mul"
+ input: "Mul_253/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_46"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_158"
+ op: "Add"
+ input: "Mul_252"
+ input: "Mul_253"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_254/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_254"
+ op: "Mul"
+ input: "Mul_254/x"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_46"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_46"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_255/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_255"
+ op: "Mul"
+ input: "Mul_255/x"
+ input: "Square_46"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_159"
+ op: "Add"
+ input: "Mul_254"
+ input: "Mul_255"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_46"
+ op: "Sqrt"
+ input: "add_159"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_160/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_160"
+ op: "Add"
+ input: "Sqrt_46"
+ input: "add_160/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_47"
+ op: "RealDiv"
+ input: "add_158"
+ input: "add_160"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_256"
+ op: "Mul"
+ input: "add"
+ input: "truediv_47"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_47"
+ op: "Sub"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/read"
+ input: "mul_256"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_337"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma"
+ input: "sub_47"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_338"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m"
+ input: "add_158"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_339"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v"
+ input: "add_159"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m"
+ input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v"
+ input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_257/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_257"
+ op: "Mul"
+ input: "Mul_257/x"
+ input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_258/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_258"
+ op: "Mul"
+ input: "Mul_258/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_47"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_161"
+ op: "Add"
+ input: "Mul_257"
+ input: "Mul_258"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_259/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_259"
+ op: "Mul"
+ input: "Mul_259/x"
+ input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_47"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_47"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_260/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_260"
+ op: "Mul"
+ input: "Mul_260/x"
+ input: "Square_47"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_162"
+ op: "Add"
+ input: "Mul_259"
+ input: "Mul_260"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_47"
+ op: "Sqrt"
+ input: "add_162"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_163/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_163"
+ op: "Add"
+ input: "Sqrt_47"
+ input: "add_163/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_48"
+ op: "RealDiv"
+ input: "add_161"
+ input: "add_163"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_261/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_261"
+ op: "Mul"
+ input: "mul_261/x"
+ input: "bert/encoder/layer_2/intermediate/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_164"
+ op: "Add"
+ input: "truediv_48"
+ input: "mul_261"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_262"
+ op: "Mul"
+ input: "add"
+ input: "add_164"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_48"
+ op: "Sub"
+ input: "bert/encoder/layer_2/intermediate/dense/kernel/read"
+ input: "mul_262"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_340"
+ op: "Assign"
+ input: "bert/encoder/layer_2/intermediate/dense/kernel"
+ input: "sub_48"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_341"
+ op: "Assign"
+ input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m"
+ input: "add_161"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_342"
+ op: "Assign"
+ input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v"
+ input: "add_162"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 3072
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/bias/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/bias/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_2/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_2/intermediate/dense/bias/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/intermediate/dense/bias/adam_m"
+ input: "bert/encoder/layer_2/intermediate/dense/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/intermediate/dense/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 3072
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/bias/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/bias/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_2/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_2/intermediate/dense/bias/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/intermediate/dense/bias/adam_v"
+ input: "bert/encoder/layer_2/intermediate/dense/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/intermediate/dense/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/intermediate/dense/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_263/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_263"
+ op: "Mul"
+ input: "Mul_263/x"
+ input: "bert/encoder/layer_2/intermediate/dense/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_264/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_264"
+ op: "Mul"
+ input: "Mul_264/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_48"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_165"
+ op: "Add"
+ input: "Mul_263"
+ input: "Mul_264"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_265/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_265"
+ op: "Mul"
+ input: "Mul_265/x"
+ input: "bert/encoder/layer_2/intermediate/dense/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_48"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_48"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_266/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_266"
+ op: "Mul"
+ input: "Mul_266/x"
+ input: "Square_48"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_166"
+ op: "Add"
+ input: "Mul_265"
+ input: "Mul_266"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_48"
+ op: "Sqrt"
+ input: "add_166"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_167/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_167"
+ op: "Add"
+ input: "Sqrt_48"
+ input: "add_167/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_49"
+ op: "RealDiv"
+ input: "add_165"
+ input: "add_167"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_267"
+ op: "Mul"
+ input: "add"
+ input: "truediv_49"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_49"
+ op: "Sub"
+ input: "bert/encoder/layer_2/intermediate/dense/bias/read"
+ input: "mul_267"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_343"
+ op: "Assign"
+ input: "bert/encoder/layer_2/intermediate/dense/bias"
+ input: "sub_49"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_344"
+ op: "Assign"
+ input: "bert/encoder/layer_2/intermediate/dense/bias/adam_m"
+ input: "add_165"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_345"
+ op: "Assign"
+ input: "bert/encoder/layer_2/intermediate/dense/bias/adam_v"
+ input: "add_166"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\014\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dense/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_2/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_2/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dense/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dense/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_2/output/dense/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dense/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/output/dense/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\014\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dense/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_2/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_2/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dense/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dense/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_2/output/dense/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dense/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/output/dense/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_268/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_268"
+ op: "Mul"
+ input: "Mul_268/x"
+ input: "bert/encoder/layer_2/output/dense/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_269/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_269"
+ op: "Mul"
+ input: "Mul_269/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_49"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_168"
+ op: "Add"
+ input: "Mul_268"
+ input: "Mul_269"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_270/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_270"
+ op: "Mul"
+ input: "Mul_270/x"
+ input: "bert/encoder/layer_2/output/dense/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_49"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_49"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_271/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_271"
+ op: "Mul"
+ input: "Mul_271/x"
+ input: "Square_49"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_169"
+ op: "Add"
+ input: "Mul_270"
+ input: "Mul_271"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_49"
+ op: "Sqrt"
+ input: "add_169"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_170/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_170"
+ op: "Add"
+ input: "Sqrt_49"
+ input: "add_170/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_50"
+ op: "RealDiv"
+ input: "add_168"
+ input: "add_170"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_272/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_272"
+ op: "Mul"
+ input: "mul_272/x"
+ input: "bert/encoder/layer_2/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_171"
+ op: "Add"
+ input: "truediv_50"
+ input: "mul_272"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_273"
+ op: "Mul"
+ input: "add"
+ input: "add_171"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_50"
+ op: "Sub"
+ input: "bert/encoder/layer_2/output/dense/kernel/read"
+ input: "mul_273"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_346"
+ op: "Assign"
+ input: "bert/encoder/layer_2/output/dense/kernel"
+ input: "sub_50"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_347"
+ op: "Assign"
+ input: "bert/encoder/layer_2/output/dense/kernel/adam_m"
+ input: "add_168"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_348"
+ op: "Assign"
+ input: "bert/encoder/layer_2/output/dense/kernel/adam_v"
+ input: "add_169"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dense/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dense/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dense/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_2/output/dense/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dense/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/output/dense/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dense/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dense/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dense/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_2/output/dense/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/dense/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/output/dense/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_274/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_274"
+ op: "Mul"
+ input: "Mul_274/x"
+ input: "bert/encoder/layer_2/output/dense/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_275/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_275"
+ op: "Mul"
+ input: "Mul_275/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_50"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_172"
+ op: "Add"
+ input: "Mul_274"
+ input: "Mul_275"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_276/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_276"
+ op: "Mul"
+ input: "Mul_276/x"
+ input: "bert/encoder/layer_2/output/dense/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_50"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_50"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_277/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_277"
+ op: "Mul"
+ input: "Mul_277/x"
+ input: "Square_50"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_173"
+ op: "Add"
+ input: "Mul_276"
+ input: "Mul_277"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_50"
+ op: "Sqrt"
+ input: "add_173"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_174/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_174"
+ op: "Add"
+ input: "Sqrt_50"
+ input: "add_174/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_51"
+ op: "RealDiv"
+ input: "add_172"
+ input: "add_174"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_278"
+ op: "Mul"
+ input: "add"
+ input: "truediv_51"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_51"
+ op: "Sub"
+ input: "bert/encoder/layer_2/output/dense/bias/read"
+ input: "mul_278"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_349"
+ op: "Assign"
+ input: "bert/encoder/layer_2/output/dense/bias"
+ input: "sub_51"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_350"
+ op: "Assign"
+ input: "bert/encoder/layer_2/output/dense/bias/adam_m"
+ input: "add_172"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_351"
+ op: "Assign"
+ input: "bert/encoder/layer_2/output/dense/bias/adam_v"
+ input: "add_173"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_279/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_279"
+ op: "Mul"
+ input: "Mul_279/x"
+ input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_280/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_280"
+ op: "Mul"
+ input: "Mul_280/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_51"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_175"
+ op: "Add"
+ input: "Mul_279"
+ input: "Mul_280"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_281/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_281"
+ op: "Mul"
+ input: "Mul_281/x"
+ input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_51"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_51"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_282/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_282"
+ op: "Mul"
+ input: "Mul_282/x"
+ input: "Square_51"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_176"
+ op: "Add"
+ input: "Mul_281"
+ input: "Mul_282"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_51"
+ op: "Sqrt"
+ input: "add_176"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_177/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_177"
+ op: "Add"
+ input: "Sqrt_51"
+ input: "add_177/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_52"
+ op: "RealDiv"
+ input: "add_175"
+ input: "add_177"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_283"
+ op: "Mul"
+ input: "add"
+ input: "truediv_52"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_52"
+ op: "Sub"
+ input: "bert/encoder/layer_2/output/LayerNorm/beta/read"
+ input: "mul_283"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_352"
+ op: "Assign"
+ input: "bert/encoder/layer_2/output/LayerNorm/beta"
+ input: "sub_52"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_353"
+ op: "Assign"
+ input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m"
+ input: "add_175"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_354"
+ op: "Assign"
+ input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v"
+ input: "add_176"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_284/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_284"
+ op: "Mul"
+ input: "Mul_284/x"
+ input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_285/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_285"
+ op: "Mul"
+ input: "Mul_285/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_52"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_178"
+ op: "Add"
+ input: "Mul_284"
+ input: "Mul_285"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_286/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_286"
+ op: "Mul"
+ input: "Mul_286/x"
+ input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_52"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_52"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_287/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_287"
+ op: "Mul"
+ input: "Mul_287/x"
+ input: "Square_52"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_179"
+ op: "Add"
+ input: "Mul_286"
+ input: "Mul_287"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_52"
+ op: "Sqrt"
+ input: "add_179"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_180/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_180"
+ op: "Add"
+ input: "Sqrt_52"
+ input: "add_180/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_53"
+ op: "RealDiv"
+ input: "add_178"
+ input: "add_180"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_288"
+ op: "Mul"
+ input: "add"
+ input: "truediv_53"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_53"
+ op: "Sub"
+ input: "bert/encoder/layer_2/output/LayerNorm/gamma/read"
+ input: "mul_288"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_355"
+ op: "Assign"
+ input: "bert/encoder/layer_2/output/LayerNorm/gamma"
+ input: "sub_53"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_356"
+ op: "Assign"
+ input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m"
+ input: "add_178"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_357"
+ op: "Assign"
+ input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v"
+ input: "add_179"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/query/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/query/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_3/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_3/attention/self/query/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/query/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/query/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/query/kernel/adam_m"
+ input: "bert/encoder/layer_3/attention/self/query/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/query/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/attention/self/query/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/query/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/query/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_3/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_3/attention/self/query/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/query/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/query/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/query/kernel/adam_v"
+ input: "bert/encoder/layer_3/attention/self/query/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/query/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/attention/self/query/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_289/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_289"
+ op: "Mul"
+ input: "Mul_289/x"
+ input: "bert/encoder/layer_3/attention/self/query/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_290/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_290"
+ op: "Mul"
+ input: "Mul_290/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_53"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_181"
+ op: "Add"
+ input: "Mul_289"
+ input: "Mul_290"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_291/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_291"
+ op: "Mul"
+ input: "Mul_291/x"
+ input: "bert/encoder/layer_3/attention/self/query/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_53"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_53"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_292/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_292"
+ op: "Mul"
+ input: "Mul_292/x"
+ input: "Square_53"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_182"
+ op: "Add"
+ input: "Mul_291"
+ input: "Mul_292"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_53"
+ op: "Sqrt"
+ input: "add_182"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_183/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_183"
+ op: "Add"
+ input: "Sqrt_53"
+ input: "add_183/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_54"
+ op: "RealDiv"
+ input: "add_181"
+ input: "add_183"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_293/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_293"
+ op: "Mul"
+ input: "mul_293/x"
+ input: "bert/encoder/layer_3/attention/self/query/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_184"
+ op: "Add"
+ input: "truediv_54"
+ input: "mul_293"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_294"
+ op: "Mul"
+ input: "add"
+ input: "add_184"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_54"
+ op: "Sub"
+ input: "bert/encoder/layer_3/attention/self/query/kernel/read"
+ input: "mul_294"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_358"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/query/kernel"
+ input: "sub_54"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_359"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/query/kernel/adam_m"
+ input: "add_181"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_360"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/query/kernel/adam_v"
+ input: "add_182"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/query/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/query/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/query/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/query/bias/adam_m"
+ input: "bert/encoder/layer_3/attention/self/query/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/query/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/attention/self/query/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/query/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/query/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/query/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/query/bias/adam_v"
+ input: "bert/encoder/layer_3/attention/self/query/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/query/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/attention/self/query/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_295/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_295"
+ op: "Mul"
+ input: "Mul_295/x"
+ input: "bert/encoder/layer_3/attention/self/query/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_296/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_296"
+ op: "Mul"
+ input: "Mul_296/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_54"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_185"
+ op: "Add"
+ input: "Mul_295"
+ input: "Mul_296"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_297/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_297"
+ op: "Mul"
+ input: "Mul_297/x"
+ input: "bert/encoder/layer_3/attention/self/query/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_54"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_54"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_298/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_298"
+ op: "Mul"
+ input: "Mul_298/x"
+ input: "Square_54"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_186"
+ op: "Add"
+ input: "Mul_297"
+ input: "Mul_298"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_54"
+ op: "Sqrt"
+ input: "add_186"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_187/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_187"
+ op: "Add"
+ input: "Sqrt_54"
+ input: "add_187/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_55"
+ op: "RealDiv"
+ input: "add_185"
+ input: "add_187"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_299"
+ op: "Mul"
+ input: "add"
+ input: "truediv_55"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_55"
+ op: "Sub"
+ input: "bert/encoder/layer_3/attention/self/query/bias/read"
+ input: "mul_299"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_361"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/query/bias"
+ input: "sub_55"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_362"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/query/bias/adam_m"
+ input: "add_185"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_363"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/query/bias/adam_v"
+ input: "add_186"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/key/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/key/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_3/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_3/attention/self/key/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/key/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/key/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/key/kernel/adam_m"
+ input: "bert/encoder/layer_3/attention/self/key/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/key/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/attention/self/key/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/key/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/key/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_3/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_3/attention/self/key/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/key/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/key/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/key/kernel/adam_v"
+ input: "bert/encoder/layer_3/attention/self/key/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/key/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/attention/self/key/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_300/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_300"
+ op: "Mul"
+ input: "Mul_300/x"
+ input: "bert/encoder/layer_3/attention/self/key/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_301/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_301"
+ op: "Mul"
+ input: "Mul_301/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_55"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_188"
+ op: "Add"
+ input: "Mul_300"
+ input: "Mul_301"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_302/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_302"
+ op: "Mul"
+ input: "Mul_302/x"
+ input: "bert/encoder/layer_3/attention/self/key/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_55"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_55"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_303/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_303"
+ op: "Mul"
+ input: "Mul_303/x"
+ input: "Square_55"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_189"
+ op: "Add"
+ input: "Mul_302"
+ input: "Mul_303"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_55"
+ op: "Sqrt"
+ input: "add_189"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_190/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_190"
+ op: "Add"
+ input: "Sqrt_55"
+ input: "add_190/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_56"
+ op: "RealDiv"
+ input: "add_188"
+ input: "add_190"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_304/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_304"
+ op: "Mul"
+ input: "mul_304/x"
+ input: "bert/encoder/layer_3/attention/self/key/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_191"
+ op: "Add"
+ input: "truediv_56"
+ input: "mul_304"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_305"
+ op: "Mul"
+ input: "add"
+ input: "add_191"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_56"
+ op: "Sub"
+ input: "bert/encoder/layer_3/attention/self/key/kernel/read"
+ input: "mul_305"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_364"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/key/kernel"
+ input: "sub_56"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_365"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/key/kernel/adam_m"
+ input: "add_188"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_366"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/key/kernel/adam_v"
+ input: "add_189"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/key/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/key/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/key/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/key/bias/adam_m"
+ input: "bert/encoder/layer_3/attention/self/key/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/key/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/attention/self/key/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/key/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/key/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/key/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/key/bias/adam_v"
+ input: "bert/encoder/layer_3/attention/self/key/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/key/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/attention/self/key/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_306/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_306"
+ op: "Mul"
+ input: "Mul_306/x"
+ input: "bert/encoder/layer_3/attention/self/key/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_307/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_307"
+ op: "Mul"
+ input: "Mul_307/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_56"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_192"
+ op: "Add"
+ input: "Mul_306"
+ input: "Mul_307"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_308/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_308"
+ op: "Mul"
+ input: "Mul_308/x"
+ input: "bert/encoder/layer_3/attention/self/key/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_56"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_56"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_309/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_309"
+ op: "Mul"
+ input: "Mul_309/x"
+ input: "Square_56"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_193"
+ op: "Add"
+ input: "Mul_308"
+ input: "Mul_309"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_56"
+ op: "Sqrt"
+ input: "add_193"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_194/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_194"
+ op: "Add"
+ input: "Sqrt_56"
+ input: "add_194/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_57"
+ op: "RealDiv"
+ input: "add_192"
+ input: "add_194"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_310"
+ op: "Mul"
+ input: "add"
+ input: "truediv_57"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_57"
+ op: "Sub"
+ input: "bert/encoder/layer_3/attention/self/key/bias/read"
+ input: "mul_310"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_367"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/key/bias"
+ input: "sub_57"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_368"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/key/bias/adam_m"
+ input: "add_192"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_369"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/key/bias/adam_v"
+ input: "add_193"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/value/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/value/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_3/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_3/attention/self/value/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/value/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/value/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/value/kernel/adam_m"
+ input: "bert/encoder/layer_3/attention/self/value/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/value/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/attention/self/value/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/value/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/value/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_3/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_3/attention/self/value/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/value/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/value/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/value/kernel/adam_v"
+ input: "bert/encoder/layer_3/attention/self/value/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/value/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/attention/self/value/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_311/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_311"
+ op: "Mul"
+ input: "Mul_311/x"
+ input: "bert/encoder/layer_3/attention/self/value/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_312/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_312"
+ op: "Mul"
+ input: "Mul_312/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_57"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_195"
+ op: "Add"
+ input: "Mul_311"
+ input: "Mul_312"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_313/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_313"
+ op: "Mul"
+ input: "Mul_313/x"
+ input: "bert/encoder/layer_3/attention/self/value/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_57"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_57"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_314/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_314"
+ op: "Mul"
+ input: "Mul_314/x"
+ input: "Square_57"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_196"
+ op: "Add"
+ input: "Mul_313"
+ input: "Mul_314"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_57"
+ op: "Sqrt"
+ input: "add_196"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_197/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_197"
+ op: "Add"
+ input: "Sqrt_57"
+ input: "add_197/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_58"
+ op: "RealDiv"
+ input: "add_195"
+ input: "add_197"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_315/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_315"
+ op: "Mul"
+ input: "mul_315/x"
+ input: "bert/encoder/layer_3/attention/self/value/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_198"
+ op: "Add"
+ input: "truediv_58"
+ input: "mul_315"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_316"
+ op: "Mul"
+ input: "add"
+ input: "add_198"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_58"
+ op: "Sub"
+ input: "bert/encoder/layer_3/attention/self/value/kernel/read"
+ input: "mul_316"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_370"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/value/kernel"
+ input: "sub_58"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_371"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/value/kernel/adam_m"
+ input: "add_195"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_372"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/value/kernel/adam_v"
+ input: "add_196"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/value/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/value/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/value/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/value/bias/adam_m"
+ input: "bert/encoder/layer_3/attention/self/value/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/value/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/attention/self/value/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/value/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/value/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/value/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/value/bias/adam_v"
+ input: "bert/encoder/layer_3/attention/self/value/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/self/value/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/attention/self/value/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_317/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_317"
+ op: "Mul"
+ input: "Mul_317/x"
+ input: "bert/encoder/layer_3/attention/self/value/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_318/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_318"
+ op: "Mul"
+ input: "Mul_318/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_58"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_199"
+ op: "Add"
+ input: "Mul_317"
+ input: "Mul_318"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_319/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_319"
+ op: "Mul"
+ input: "Mul_319/x"
+ input: "bert/encoder/layer_3/attention/self/value/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_58"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_58"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_320/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_320"
+ op: "Mul"
+ input: "Mul_320/x"
+ input: "Square_58"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_200"
+ op: "Add"
+ input: "Mul_319"
+ input: "Mul_320"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_58"
+ op: "Sqrt"
+ input: "add_200"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_201/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_201"
+ op: "Add"
+ input: "Sqrt_58"
+ input: "add_201/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_59"
+ op: "RealDiv"
+ input: "add_199"
+ input: "add_201"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_321"
+ op: "Mul"
+ input: "add"
+ input: "truediv_59"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_59"
+ op: "Sub"
+ input: "bert/encoder/layer_3/attention/self/value/bias/read"
+ input: "mul_321"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_373"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/value/bias"
+ input: "sub_59"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_374"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/value/bias/adam_m"
+ input: "add_199"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_375"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/value/bias/adam_v"
+ input: "add_200"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_322/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_322"
+ op: "Mul"
+ input: "Mul_322/x"
+ input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_323/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_323"
+ op: "Mul"
+ input: "Mul_323/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_59"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_202"
+ op: "Add"
+ input: "Mul_322"
+ input: "Mul_323"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_324/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_324"
+ op: "Mul"
+ input: "Mul_324/x"
+ input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_59"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_59"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_325/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_325"
+ op: "Mul"
+ input: "Mul_325/x"
+ input: "Square_59"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_203"
+ op: "Add"
+ input: "Mul_324"
+ input: "Mul_325"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_59"
+ op: "Sqrt"
+ input: "add_203"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_204/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_204"
+ op: "Add"
+ input: "Sqrt_59"
+ input: "add_204/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_60"
+ op: "RealDiv"
+ input: "add_202"
+ input: "add_204"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_326/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_326"
+ op: "Mul"
+ input: "mul_326/x"
+ input: "bert/encoder/layer_3/attention/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_205"
+ op: "Add"
+ input: "truediv_60"
+ input: "mul_326"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_327"
+ op: "Mul"
+ input: "add"
+ input: "add_205"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_60"
+ op: "Sub"
+ input: "bert/encoder/layer_3/attention/output/dense/kernel/read"
+ input: "mul_327"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_376"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/output/dense/kernel"
+ input: "sub_60"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_377"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m"
+ input: "add_202"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_378"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v"
+ input: "add_203"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dense/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dense/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dense/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_3/attention/output/dense/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dense/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/attention/output/dense/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dense/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dense/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dense/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_3/attention/output/dense/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/dense/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/attention/output/dense/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_328/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_328"
+ op: "Mul"
+ input: "Mul_328/x"
+ input: "bert/encoder/layer_3/attention/output/dense/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_329/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_329"
+ op: "Mul"
+ input: "Mul_329/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_60"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_206"
+ op: "Add"
+ input: "Mul_328"
+ input: "Mul_329"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_330/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_330"
+ op: "Mul"
+ input: "Mul_330/x"
+ input: "bert/encoder/layer_3/attention/output/dense/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_60"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_60"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_331/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_331"
+ op: "Mul"
+ input: "Mul_331/x"
+ input: "Square_60"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_207"
+ op: "Add"
+ input: "Mul_330"
+ input: "Mul_331"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_60"
+ op: "Sqrt"
+ input: "add_207"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_208/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_208"
+ op: "Add"
+ input: "Sqrt_60"
+ input: "add_208/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_61"
+ op: "RealDiv"
+ input: "add_206"
+ input: "add_208"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_332"
+ op: "Mul"
+ input: "add"
+ input: "truediv_61"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_61"
+ op: "Sub"
+ input: "bert/encoder/layer_3/attention/output/dense/bias/read"
+ input: "mul_332"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_379"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/output/dense/bias"
+ input: "sub_61"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_380"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/output/dense/bias/adam_m"
+ input: "add_206"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_381"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/output/dense/bias/adam_v"
+ input: "add_207"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_333/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_333"
+ op: "Mul"
+ input: "Mul_333/x"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_334/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_334"
+ op: "Mul"
+ input: "Mul_334/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_61"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_209"
+ op: "Add"
+ input: "Mul_333"
+ input: "Mul_334"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_335/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_335"
+ op: "Mul"
+ input: "Mul_335/x"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_61"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_61"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_336/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_336"
+ op: "Mul"
+ input: "Mul_336/x"
+ input: "Square_61"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_210"
+ op: "Add"
+ input: "Mul_335"
+ input: "Mul_336"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_61"
+ op: "Sqrt"
+ input: "add_210"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_211/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_211"
+ op: "Add"
+ input: "Sqrt_61"
+ input: "add_211/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_62"
+ op: "RealDiv"
+ input: "add_209"
+ input: "add_211"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_337"
+ op: "Mul"
+ input: "add"
+ input: "truediv_62"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_62"
+ op: "Sub"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/read"
+ input: "mul_337"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_382"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/beta"
+ input: "sub_62"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_383"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m"
+ input: "add_209"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_384"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v"
+ input: "add_210"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_338/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_338"
+ op: "Mul"
+ input: "Mul_338/x"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_339/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_339"
+ op: "Mul"
+ input: "Mul_339/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_62"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_212"
+ op: "Add"
+ input: "Mul_338"
+ input: "Mul_339"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_340/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_340"
+ op: "Mul"
+ input: "Mul_340/x"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_62"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_62"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_341/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_341"
+ op: "Mul"
+ input: "Mul_341/x"
+ input: "Square_62"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_213"
+ op: "Add"
+ input: "Mul_340"
+ input: "Mul_341"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_62"
+ op: "Sqrt"
+ input: "add_213"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_214/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_214"
+ op: "Add"
+ input: "Sqrt_62"
+ input: "add_214/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_63"
+ op: "RealDiv"
+ input: "add_212"
+ input: "add_214"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_342"
+ op: "Mul"
+ input: "add"
+ input: "truediv_63"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_63"
+ op: "Sub"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/read"
+ input: "mul_342"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_385"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma"
+ input: "sub_63"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_386"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m"
+ input: "add_212"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_387"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v"
+ input: "add_213"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m"
+ input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v"
+ input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_343/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_343"
+ op: "Mul"
+ input: "Mul_343/x"
+ input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_344/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_344"
+ op: "Mul"
+ input: "Mul_344/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_63"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_215"
+ op: "Add"
+ input: "Mul_343"
+ input: "Mul_344"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_345/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_345"
+ op: "Mul"
+ input: "Mul_345/x"
+ input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_63"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_63"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_346/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_346"
+ op: "Mul"
+ input: "Mul_346/x"
+ input: "Square_63"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_216"
+ op: "Add"
+ input: "Mul_345"
+ input: "Mul_346"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_63"
+ op: "Sqrt"
+ input: "add_216"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_217/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_217"
+ op: "Add"
+ input: "Sqrt_63"
+ input: "add_217/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_64"
+ op: "RealDiv"
+ input: "add_215"
+ input: "add_217"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_347/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_347"
+ op: "Mul"
+ input: "mul_347/x"
+ input: "bert/encoder/layer_3/intermediate/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_218"
+ op: "Add"
+ input: "truediv_64"
+ input: "mul_347"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_348"
+ op: "Mul"
+ input: "add"
+ input: "add_218"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_64"
+ op: "Sub"
+ input: "bert/encoder/layer_3/intermediate/dense/kernel/read"
+ input: "mul_348"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_388"
+ op: "Assign"
+ input: "bert/encoder/layer_3/intermediate/dense/kernel"
+ input: "sub_64"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_389"
+ op: "Assign"
+ input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m"
+ input: "add_215"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_390"
+ op: "Assign"
+ input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v"
+ input: "add_216"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 3072
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/bias/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/bias/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_3/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_3/intermediate/dense/bias/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/intermediate/dense/bias/adam_m"
+ input: "bert/encoder/layer_3/intermediate/dense/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/intermediate/dense/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 3072
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/bias/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/bias/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_3/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_3/intermediate/dense/bias/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/intermediate/dense/bias/adam_v"
+ input: "bert/encoder/layer_3/intermediate/dense/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/intermediate/dense/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/intermediate/dense/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_349/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_349"
+ op: "Mul"
+ input: "Mul_349/x"
+ input: "bert/encoder/layer_3/intermediate/dense/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_350/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_350"
+ op: "Mul"
+ input: "Mul_350/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_64"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_219"
+ op: "Add"
+ input: "Mul_349"
+ input: "Mul_350"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_351/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_351"
+ op: "Mul"
+ input: "Mul_351/x"
+ input: "bert/encoder/layer_3/intermediate/dense/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_64"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_64"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_352/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_352"
+ op: "Mul"
+ input: "Mul_352/x"
+ input: "Square_64"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_220"
+ op: "Add"
+ input: "Mul_351"
+ input: "Mul_352"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_64"
+ op: "Sqrt"
+ input: "add_220"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_221/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_221"
+ op: "Add"
+ input: "Sqrt_64"
+ input: "add_221/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_65"
+ op: "RealDiv"
+ input: "add_219"
+ input: "add_221"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_353"
+ op: "Mul"
+ input: "add"
+ input: "truediv_65"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_65"
+ op: "Sub"
+ input: "bert/encoder/layer_3/intermediate/dense/bias/read"
+ input: "mul_353"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_391"
+ op: "Assign"
+ input: "bert/encoder/layer_3/intermediate/dense/bias"
+ input: "sub_65"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_392"
+ op: "Assign"
+ input: "bert/encoder/layer_3/intermediate/dense/bias/adam_m"
+ input: "add_219"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_393"
+ op: "Assign"
+ input: "bert/encoder/layer_3/intermediate/dense/bias/adam_v"
+ input: "add_220"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\014\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dense/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_3/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_3/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dense/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dense/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_3/output/dense/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dense/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/output/dense/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\014\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dense/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_3/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_3/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dense/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dense/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_3/output/dense/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dense/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/output/dense/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_354/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_354"
+ op: "Mul"
+ input: "Mul_354/x"
+ input: "bert/encoder/layer_3/output/dense/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_355/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_355"
+ op: "Mul"
+ input: "Mul_355/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_65"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_222"
+ op: "Add"
+ input: "Mul_354"
+ input: "Mul_355"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_356/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_356"
+ op: "Mul"
+ input: "Mul_356/x"
+ input: "bert/encoder/layer_3/output/dense/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_65"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_65"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_357/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_357"
+ op: "Mul"
+ input: "Mul_357/x"
+ input: "Square_65"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_223"
+ op: "Add"
+ input: "Mul_356"
+ input: "Mul_357"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_65"
+ op: "Sqrt"
+ input: "add_223"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_224/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_224"
+ op: "Add"
+ input: "Sqrt_65"
+ input: "add_224/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_66"
+ op: "RealDiv"
+ input: "add_222"
+ input: "add_224"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_358/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_358"
+ op: "Mul"
+ input: "mul_358/x"
+ input: "bert/encoder/layer_3/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_225"
+ op: "Add"
+ input: "truediv_66"
+ input: "mul_358"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_359"
+ op: "Mul"
+ input: "add"
+ input: "add_225"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_66"
+ op: "Sub"
+ input: "bert/encoder/layer_3/output/dense/kernel/read"
+ input: "mul_359"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_394"
+ op: "Assign"
+ input: "bert/encoder/layer_3/output/dense/kernel"
+ input: "sub_66"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_395"
+ op: "Assign"
+ input: "bert/encoder/layer_3/output/dense/kernel/adam_m"
+ input: "add_222"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_396"
+ op: "Assign"
+ input: "bert/encoder/layer_3/output/dense/kernel/adam_v"
+ input: "add_223"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dense/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dense/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dense/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_3/output/dense/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dense/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/output/dense/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dense/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dense/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dense/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_3/output/dense/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/dense/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/output/dense/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_360/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_360"
+ op: "Mul"
+ input: "Mul_360/x"
+ input: "bert/encoder/layer_3/output/dense/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_361/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_361"
+ op: "Mul"
+ input: "Mul_361/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_66"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_226"
+ op: "Add"
+ input: "Mul_360"
+ input: "Mul_361"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_362/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_362"
+ op: "Mul"
+ input: "Mul_362/x"
+ input: "bert/encoder/layer_3/output/dense/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_66"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_66"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_363/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_363"
+ op: "Mul"
+ input: "Mul_363/x"
+ input: "Square_66"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_227"
+ op: "Add"
+ input: "Mul_362"
+ input: "Mul_363"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_66"
+ op: "Sqrt"
+ input: "add_227"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_228/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_228"
+ op: "Add"
+ input: "Sqrt_66"
+ input: "add_228/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_67"
+ op: "RealDiv"
+ input: "add_226"
+ input: "add_228"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_364"
+ op: "Mul"
+ input: "add"
+ input: "truediv_67"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_67"
+ op: "Sub"
+ input: "bert/encoder/layer_3/output/dense/bias/read"
+ input: "mul_364"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_397"
+ op: "Assign"
+ input: "bert/encoder/layer_3/output/dense/bias"
+ input: "sub_67"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_398"
+ op: "Assign"
+ input: "bert/encoder/layer_3/output/dense/bias/adam_m"
+ input: "add_226"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_399"
+ op: "Assign"
+ input: "bert/encoder/layer_3/output/dense/bias/adam_v"
+ input: "add_227"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_365/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_365"
+ op: "Mul"
+ input: "Mul_365/x"
+ input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_366/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_366"
+ op: "Mul"
+ input: "Mul_366/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_67"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_229"
+ op: "Add"
+ input: "Mul_365"
+ input: "Mul_366"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_367/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_367"
+ op: "Mul"
+ input: "Mul_367/x"
+ input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_67"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_67"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_368/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_368"
+ op: "Mul"
+ input: "Mul_368/x"
+ input: "Square_67"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_230"
+ op: "Add"
+ input: "Mul_367"
+ input: "Mul_368"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_67"
+ op: "Sqrt"
+ input: "add_230"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_231/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_231"
+ op: "Add"
+ input: "Sqrt_67"
+ input: "add_231/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_68"
+ op: "RealDiv"
+ input: "add_229"
+ input: "add_231"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_369"
+ op: "Mul"
+ input: "add"
+ input: "truediv_68"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_68"
+ op: "Sub"
+ input: "bert/encoder/layer_3/output/LayerNorm/beta/read"
+ input: "mul_369"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_400"
+ op: "Assign"
+ input: "bert/encoder/layer_3/output/LayerNorm/beta"
+ input: "sub_68"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_401"
+ op: "Assign"
+ input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m"
+ input: "add_229"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_402"
+ op: "Assign"
+ input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v"
+ input: "add_230"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_370/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_370"
+ op: "Mul"
+ input: "Mul_370/x"
+ input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_371/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_371"
+ op: "Mul"
+ input: "Mul_371/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_68"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_232"
+ op: "Add"
+ input: "Mul_370"
+ input: "Mul_371"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_372/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_372"
+ op: "Mul"
+ input: "Mul_372/x"
+ input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_68"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_68"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_373/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_373"
+ op: "Mul"
+ input: "Mul_373/x"
+ input: "Square_68"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_233"
+ op: "Add"
+ input: "Mul_372"
+ input: "Mul_373"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_68"
+ op: "Sqrt"
+ input: "add_233"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_234/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_234"
+ op: "Add"
+ input: "Sqrt_68"
+ input: "add_234/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_69"
+ op: "RealDiv"
+ input: "add_232"
+ input: "add_234"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_374"
+ op: "Mul"
+ input: "add"
+ input: "truediv_69"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_69"
+ op: "Sub"
+ input: "bert/encoder/layer_3/output/LayerNorm/gamma/read"
+ input: "mul_374"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_403"
+ op: "Assign"
+ input: "bert/encoder/layer_3/output/LayerNorm/gamma"
+ input: "sub_69"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_404"
+ op: "Assign"
+ input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m"
+ input: "add_232"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_405"
+ op: "Assign"
+ input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v"
+ input: "add_233"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/query/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/query/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_4/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_4/attention/self/query/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/query/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/query/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/query/kernel/adam_m"
+ input: "bert/encoder/layer_4/attention/self/query/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/query/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/attention/self/query/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/query/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/query/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_4/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_4/attention/self/query/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/query/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/query/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/query/kernel/adam_v"
+ input: "bert/encoder/layer_4/attention/self/query/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/query/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/attention/self/query/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_375/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_375"
+ op: "Mul"
+ input: "Mul_375/x"
+ input: "bert/encoder/layer_4/attention/self/query/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_376/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_376"
+ op: "Mul"
+ input: "Mul_376/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_69"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_235"
+ op: "Add"
+ input: "Mul_375"
+ input: "Mul_376"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_377/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_377"
+ op: "Mul"
+ input: "Mul_377/x"
+ input: "bert/encoder/layer_4/attention/self/query/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_69"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_69"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_378/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_378"
+ op: "Mul"
+ input: "Mul_378/x"
+ input: "Square_69"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_236"
+ op: "Add"
+ input: "Mul_377"
+ input: "Mul_378"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_69"
+ op: "Sqrt"
+ input: "add_236"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_237/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_237"
+ op: "Add"
+ input: "Sqrt_69"
+ input: "add_237/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_70"
+ op: "RealDiv"
+ input: "add_235"
+ input: "add_237"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_379/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_379"
+ op: "Mul"
+ input: "mul_379/x"
+ input: "bert/encoder/layer_4/attention/self/query/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_238"
+ op: "Add"
+ input: "truediv_70"
+ input: "mul_379"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_380"
+ op: "Mul"
+ input: "add"
+ input: "add_238"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_70"
+ op: "Sub"
+ input: "bert/encoder/layer_4/attention/self/query/kernel/read"
+ input: "mul_380"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_406"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/query/kernel"
+ input: "sub_70"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_407"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/query/kernel/adam_m"
+ input: "add_235"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_408"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/query/kernel/adam_v"
+ input: "add_236"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/query/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/query/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/query/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/query/bias/adam_m"
+ input: "bert/encoder/layer_4/attention/self/query/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/query/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/attention/self/query/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/query/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/query/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/query/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/query/bias/adam_v"
+ input: "bert/encoder/layer_4/attention/self/query/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/query/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/attention/self/query/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_381/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_381"
+ op: "Mul"
+ input: "Mul_381/x"
+ input: "bert/encoder/layer_4/attention/self/query/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_382/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_382"
+ op: "Mul"
+ input: "Mul_382/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_70"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_239"
+ op: "Add"
+ input: "Mul_381"
+ input: "Mul_382"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_383/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_383"
+ op: "Mul"
+ input: "Mul_383/x"
+ input: "bert/encoder/layer_4/attention/self/query/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_70"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_70"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_384/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_384"
+ op: "Mul"
+ input: "Mul_384/x"
+ input: "Square_70"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_240"
+ op: "Add"
+ input: "Mul_383"
+ input: "Mul_384"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_70"
+ op: "Sqrt"
+ input: "add_240"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_241/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_241"
+ op: "Add"
+ input: "Sqrt_70"
+ input: "add_241/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_71"
+ op: "RealDiv"
+ input: "add_239"
+ input: "add_241"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_385"
+ op: "Mul"
+ input: "add"
+ input: "truediv_71"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_71"
+ op: "Sub"
+ input: "bert/encoder/layer_4/attention/self/query/bias/read"
+ input: "mul_385"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_409"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/query/bias"
+ input: "sub_71"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_410"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/query/bias/adam_m"
+ input: "add_239"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_411"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/query/bias/adam_v"
+ input: "add_240"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/key/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/key/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_4/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_4/attention/self/key/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/key/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/key/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/key/kernel/adam_m"
+ input: "bert/encoder/layer_4/attention/self/key/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/key/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/attention/self/key/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/key/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/key/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_4/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_4/attention/self/key/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/key/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/key/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/key/kernel/adam_v"
+ input: "bert/encoder/layer_4/attention/self/key/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/key/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/attention/self/key/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_386/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_386"
+ op: "Mul"
+ input: "Mul_386/x"
+ input: "bert/encoder/layer_4/attention/self/key/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_387/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_387"
+ op: "Mul"
+ input: "Mul_387/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_71"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_242"
+ op: "Add"
+ input: "Mul_386"
+ input: "Mul_387"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_388/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_388"
+ op: "Mul"
+ input: "Mul_388/x"
+ input: "bert/encoder/layer_4/attention/self/key/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_71"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_71"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_389/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_389"
+ op: "Mul"
+ input: "Mul_389/x"
+ input: "Square_71"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_243"
+ op: "Add"
+ input: "Mul_388"
+ input: "Mul_389"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_71"
+ op: "Sqrt"
+ input: "add_243"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_244/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_244"
+ op: "Add"
+ input: "Sqrt_71"
+ input: "add_244/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_72"
+ op: "RealDiv"
+ input: "add_242"
+ input: "add_244"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_390/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_390"
+ op: "Mul"
+ input: "mul_390/x"
+ input: "bert/encoder/layer_4/attention/self/key/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_245"
+ op: "Add"
+ input: "truediv_72"
+ input: "mul_390"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_391"
+ op: "Mul"
+ input: "add"
+ input: "add_245"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_72"
+ op: "Sub"
+ input: "bert/encoder/layer_4/attention/self/key/kernel/read"
+ input: "mul_391"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_412"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/key/kernel"
+ input: "sub_72"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_413"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/key/kernel/adam_m"
+ input: "add_242"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_414"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/key/kernel/adam_v"
+ input: "add_243"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/key/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/key/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/key/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/key/bias/adam_m"
+ input: "bert/encoder/layer_4/attention/self/key/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/key/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/attention/self/key/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/key/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/key/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/key/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/key/bias/adam_v"
+ input: "bert/encoder/layer_4/attention/self/key/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/key/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/attention/self/key/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_392/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_392"
+ op: "Mul"
+ input: "Mul_392/x"
+ input: "bert/encoder/layer_4/attention/self/key/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_393/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_393"
+ op: "Mul"
+ input: "Mul_393/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_72"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_246"
+ op: "Add"
+ input: "Mul_392"
+ input: "Mul_393"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_394/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_394"
+ op: "Mul"
+ input: "Mul_394/x"
+ input: "bert/encoder/layer_4/attention/self/key/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_72"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_72"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_395/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_395"
+ op: "Mul"
+ input: "Mul_395/x"
+ input: "Square_72"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_247"
+ op: "Add"
+ input: "Mul_394"
+ input: "Mul_395"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_72"
+ op: "Sqrt"
+ input: "add_247"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_248/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_248"
+ op: "Add"
+ input: "Sqrt_72"
+ input: "add_248/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_73"
+ op: "RealDiv"
+ input: "add_246"
+ input: "add_248"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_396"
+ op: "Mul"
+ input: "add"
+ input: "truediv_73"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_73"
+ op: "Sub"
+ input: "bert/encoder/layer_4/attention/self/key/bias/read"
+ input: "mul_396"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_415"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/key/bias"
+ input: "sub_73"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_416"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/key/bias/adam_m"
+ input: "add_246"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_417"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/key/bias/adam_v"
+ input: "add_247"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/value/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/value/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_4/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_4/attention/self/value/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/value/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/value/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/value/kernel/adam_m"
+ input: "bert/encoder/layer_4/attention/self/value/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/value/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/attention/self/value/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/value/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/value/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_4/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_4/attention/self/value/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/value/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/value/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/value/kernel/adam_v"
+ input: "bert/encoder/layer_4/attention/self/value/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/value/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/attention/self/value/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_397/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_397"
+ op: "Mul"
+ input: "Mul_397/x"
+ input: "bert/encoder/layer_4/attention/self/value/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_398/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_398"
+ op: "Mul"
+ input: "Mul_398/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_73"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_249"
+ op: "Add"
+ input: "Mul_397"
+ input: "Mul_398"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_399/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_399"
+ op: "Mul"
+ input: "Mul_399/x"
+ input: "bert/encoder/layer_4/attention/self/value/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_73"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_73"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_400/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_400"
+ op: "Mul"
+ input: "Mul_400/x"
+ input: "Square_73"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_250"
+ op: "Add"
+ input: "Mul_399"
+ input: "Mul_400"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_73"
+ op: "Sqrt"
+ input: "add_250"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_251/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_251"
+ op: "Add"
+ input: "Sqrt_73"
+ input: "add_251/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_74"
+ op: "RealDiv"
+ input: "add_249"
+ input: "add_251"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_401/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_401"
+ op: "Mul"
+ input: "mul_401/x"
+ input: "bert/encoder/layer_4/attention/self/value/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_252"
+ op: "Add"
+ input: "truediv_74"
+ input: "mul_401"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_402"
+ op: "Mul"
+ input: "add"
+ input: "add_252"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_74"
+ op: "Sub"
+ input: "bert/encoder/layer_4/attention/self/value/kernel/read"
+ input: "mul_402"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_418"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/value/kernel"
+ input: "sub_74"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_419"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/value/kernel/adam_m"
+ input: "add_249"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_420"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/value/kernel/adam_v"
+ input: "add_250"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/value/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/value/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/value/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/value/bias/adam_m"
+ input: "bert/encoder/layer_4/attention/self/value/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/value/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/attention/self/value/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/value/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/value/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/value/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/value/bias/adam_v"
+ input: "bert/encoder/layer_4/attention/self/value/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/self/value/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/attention/self/value/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_403/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_403"
+ op: "Mul"
+ input: "Mul_403/x"
+ input: "bert/encoder/layer_4/attention/self/value/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_404/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_404"
+ op: "Mul"
+ input: "Mul_404/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_74"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_253"
+ op: "Add"
+ input: "Mul_403"
+ input: "Mul_404"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_405/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_405"
+ op: "Mul"
+ input: "Mul_405/x"
+ input: "bert/encoder/layer_4/attention/self/value/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_74"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_74"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_406/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_406"
+ op: "Mul"
+ input: "Mul_406/x"
+ input: "Square_74"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_254"
+ op: "Add"
+ input: "Mul_405"
+ input: "Mul_406"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_74"
+ op: "Sqrt"
+ input: "add_254"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_255/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_255"
+ op: "Add"
+ input: "Sqrt_74"
+ input: "add_255/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_75"
+ op: "RealDiv"
+ input: "add_253"
+ input: "add_255"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_407"
+ op: "Mul"
+ input: "add"
+ input: "truediv_75"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_75"
+ op: "Sub"
+ input: "bert/encoder/layer_4/attention/self/value/bias/read"
+ input: "mul_407"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_421"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/value/bias"
+ input: "sub_75"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_422"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/value/bias/adam_m"
+ input: "add_253"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_423"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/value/bias/adam_v"
+ input: "add_254"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_408/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_408"
+ op: "Mul"
+ input: "Mul_408/x"
+ input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_409/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_409"
+ op: "Mul"
+ input: "Mul_409/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_75"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_256"
+ op: "Add"
+ input: "Mul_408"
+ input: "Mul_409"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_410/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_410"
+ op: "Mul"
+ input: "Mul_410/x"
+ input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_75"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_75"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_411/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_411"
+ op: "Mul"
+ input: "Mul_411/x"
+ input: "Square_75"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_257"
+ op: "Add"
+ input: "Mul_410"
+ input: "Mul_411"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_75"
+ op: "Sqrt"
+ input: "add_257"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_258/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_258"
+ op: "Add"
+ input: "Sqrt_75"
+ input: "add_258/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_76"
+ op: "RealDiv"
+ input: "add_256"
+ input: "add_258"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_412/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_412"
+ op: "Mul"
+ input: "mul_412/x"
+ input: "bert/encoder/layer_4/attention/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_259"
+ op: "Add"
+ input: "truediv_76"
+ input: "mul_412"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_413"
+ op: "Mul"
+ input: "add"
+ input: "add_259"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_76"
+ op: "Sub"
+ input: "bert/encoder/layer_4/attention/output/dense/kernel/read"
+ input: "mul_413"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_424"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/output/dense/kernel"
+ input: "sub_76"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_425"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m"
+ input: "add_256"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_426"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v"
+ input: "add_257"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dense/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dense/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dense/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_4/attention/output/dense/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dense/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/attention/output/dense/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dense/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dense/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dense/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_4/attention/output/dense/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/dense/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/attention/output/dense/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_414/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_414"
+ op: "Mul"
+ input: "Mul_414/x"
+ input: "bert/encoder/layer_4/attention/output/dense/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_415/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_415"
+ op: "Mul"
+ input: "Mul_415/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_76"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_260"
+ op: "Add"
+ input: "Mul_414"
+ input: "Mul_415"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_416/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_416"
+ op: "Mul"
+ input: "Mul_416/x"
+ input: "bert/encoder/layer_4/attention/output/dense/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_76"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_76"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_417/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_417"
+ op: "Mul"
+ input: "Mul_417/x"
+ input: "Square_76"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_261"
+ op: "Add"
+ input: "Mul_416"
+ input: "Mul_417"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_76"
+ op: "Sqrt"
+ input: "add_261"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_262/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_262"
+ op: "Add"
+ input: "Sqrt_76"
+ input: "add_262/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_77"
+ op: "RealDiv"
+ input: "add_260"
+ input: "add_262"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_418"
+ op: "Mul"
+ input: "add"
+ input: "truediv_77"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_77"
+ op: "Sub"
+ input: "bert/encoder/layer_4/attention/output/dense/bias/read"
+ input: "mul_418"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_427"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/output/dense/bias"
+ input: "sub_77"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_428"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/output/dense/bias/adam_m"
+ input: "add_260"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_429"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/output/dense/bias/adam_v"
+ input: "add_261"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_419/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_419"
+ op: "Mul"
+ input: "Mul_419/x"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_420/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_420"
+ op: "Mul"
+ input: "Mul_420/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_77"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_263"
+ op: "Add"
+ input: "Mul_419"
+ input: "Mul_420"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_421/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_421"
+ op: "Mul"
+ input: "Mul_421/x"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_77"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_77"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_422/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_422"
+ op: "Mul"
+ input: "Mul_422/x"
+ input: "Square_77"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_264"
+ op: "Add"
+ input: "Mul_421"
+ input: "Mul_422"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_77"
+ op: "Sqrt"
+ input: "add_264"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_265/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_265"
+ op: "Add"
+ input: "Sqrt_77"
+ input: "add_265/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_78"
+ op: "RealDiv"
+ input: "add_263"
+ input: "add_265"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_423"
+ op: "Mul"
+ input: "add"
+ input: "truediv_78"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_78"
+ op: "Sub"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/read"
+ input: "mul_423"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_430"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/beta"
+ input: "sub_78"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_431"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m"
+ input: "add_263"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_432"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v"
+ input: "add_264"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_424/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_424"
+ op: "Mul"
+ input: "Mul_424/x"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_425/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_425"
+ op: "Mul"
+ input: "Mul_425/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_78"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_266"
+ op: "Add"
+ input: "Mul_424"
+ input: "Mul_425"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_426/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_426"
+ op: "Mul"
+ input: "Mul_426/x"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_78"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_78"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_427/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_427"
+ op: "Mul"
+ input: "Mul_427/x"
+ input: "Square_78"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_267"
+ op: "Add"
+ input: "Mul_426"
+ input: "Mul_427"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_78"
+ op: "Sqrt"
+ input: "add_267"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_268/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_268"
+ op: "Add"
+ input: "Sqrt_78"
+ input: "add_268/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_79"
+ op: "RealDiv"
+ input: "add_266"
+ input: "add_268"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_428"
+ op: "Mul"
+ input: "add"
+ input: "truediv_79"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_79"
+ op: "Sub"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/read"
+ input: "mul_428"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_433"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma"
+ input: "sub_79"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_434"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m"
+ input: "add_266"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_435"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v"
+ input: "add_267"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m"
+ input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v"
+ input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_429/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_429"
+ op: "Mul"
+ input: "Mul_429/x"
+ input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_430/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_430"
+ op: "Mul"
+ input: "Mul_430/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_79"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_269"
+ op: "Add"
+ input: "Mul_429"
+ input: "Mul_430"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_431/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_431"
+ op: "Mul"
+ input: "Mul_431/x"
+ input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_79"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_79"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_432/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_432"
+ op: "Mul"
+ input: "Mul_432/x"
+ input: "Square_79"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_270"
+ op: "Add"
+ input: "Mul_431"
+ input: "Mul_432"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_79"
+ op: "Sqrt"
+ input: "add_270"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_271/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_271"
+ op: "Add"
+ input: "Sqrt_79"
+ input: "add_271/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_80"
+ op: "RealDiv"
+ input: "add_269"
+ input: "add_271"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_433/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_433"
+ op: "Mul"
+ input: "mul_433/x"
+ input: "bert/encoder/layer_4/intermediate/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_272"
+ op: "Add"
+ input: "truediv_80"
+ input: "mul_433"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_434"
+ op: "Mul"
+ input: "add"
+ input: "add_272"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_80"
+ op: "Sub"
+ input: "bert/encoder/layer_4/intermediate/dense/kernel/read"
+ input: "mul_434"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_436"
+ op: "Assign"
+ input: "bert/encoder/layer_4/intermediate/dense/kernel"
+ input: "sub_80"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_437"
+ op: "Assign"
+ input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m"
+ input: "add_269"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_438"
+ op: "Assign"
+ input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v"
+ input: "add_270"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 3072
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/bias/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/bias/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_4/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_4/intermediate/dense/bias/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/intermediate/dense/bias/adam_m"
+ input: "bert/encoder/layer_4/intermediate/dense/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/intermediate/dense/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 3072
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/bias/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/bias/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_4/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_4/intermediate/dense/bias/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/intermediate/dense/bias/adam_v"
+ input: "bert/encoder/layer_4/intermediate/dense/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/intermediate/dense/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/intermediate/dense/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_435/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_435"
+ op: "Mul"
+ input: "Mul_435/x"
+ input: "bert/encoder/layer_4/intermediate/dense/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_436/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_436"
+ op: "Mul"
+ input: "Mul_436/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_80"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_273"
+ op: "Add"
+ input: "Mul_435"
+ input: "Mul_436"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_437/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_437"
+ op: "Mul"
+ input: "Mul_437/x"
+ input: "bert/encoder/layer_4/intermediate/dense/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_80"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_80"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_438/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_438"
+ op: "Mul"
+ input: "Mul_438/x"
+ input: "Square_80"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_274"
+ op: "Add"
+ input: "Mul_437"
+ input: "Mul_438"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_80"
+ op: "Sqrt"
+ input: "add_274"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_275/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_275"
+ op: "Add"
+ input: "Sqrt_80"
+ input: "add_275/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_81"
+ op: "RealDiv"
+ input: "add_273"
+ input: "add_275"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_439"
+ op: "Mul"
+ input: "add"
+ input: "truediv_81"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_81"
+ op: "Sub"
+ input: "bert/encoder/layer_4/intermediate/dense/bias/read"
+ input: "mul_439"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_439"
+ op: "Assign"
+ input: "bert/encoder/layer_4/intermediate/dense/bias"
+ input: "sub_81"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_440"
+ op: "Assign"
+ input: "bert/encoder/layer_4/intermediate/dense/bias/adam_m"
+ input: "add_273"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_441"
+ op: "Assign"
+ input: "bert/encoder/layer_4/intermediate/dense/bias/adam_v"
+ input: "add_274"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\014\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dense/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_4/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_4/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dense/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dense/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_4/output/dense/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dense/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/output/dense/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\014\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dense/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_4/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_4/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dense/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dense/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_4/output/dense/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dense/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/output/dense/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_440/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_440"
+ op: "Mul"
+ input: "Mul_440/x"
+ input: "bert/encoder/layer_4/output/dense/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_441/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_441"
+ op: "Mul"
+ input: "Mul_441/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_81"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_276"
+ op: "Add"
+ input: "Mul_440"
+ input: "Mul_441"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_442/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_442"
+ op: "Mul"
+ input: "Mul_442/x"
+ input: "bert/encoder/layer_4/output/dense/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_81"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_81"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_443/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_443"
+ op: "Mul"
+ input: "Mul_443/x"
+ input: "Square_81"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_277"
+ op: "Add"
+ input: "Mul_442"
+ input: "Mul_443"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_81"
+ op: "Sqrt"
+ input: "add_277"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_278/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_278"
+ op: "Add"
+ input: "Sqrt_81"
+ input: "add_278/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_82"
+ op: "RealDiv"
+ input: "add_276"
+ input: "add_278"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_444/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_444"
+ op: "Mul"
+ input: "mul_444/x"
+ input: "bert/encoder/layer_4/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_279"
+ op: "Add"
+ input: "truediv_82"
+ input: "mul_444"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_445"
+ op: "Mul"
+ input: "add"
+ input: "add_279"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_82"
+ op: "Sub"
+ input: "bert/encoder/layer_4/output/dense/kernel/read"
+ input: "mul_445"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_442"
+ op: "Assign"
+ input: "bert/encoder/layer_4/output/dense/kernel"
+ input: "sub_82"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_443"
+ op: "Assign"
+ input: "bert/encoder/layer_4/output/dense/kernel/adam_m"
+ input: "add_276"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_444"
+ op: "Assign"
+ input: "bert/encoder/layer_4/output/dense/kernel/adam_v"
+ input: "add_277"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dense/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dense/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dense/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_4/output/dense/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dense/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/output/dense/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dense/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dense/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dense/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_4/output/dense/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/dense/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/output/dense/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_446/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_446"
+ op: "Mul"
+ input: "Mul_446/x"
+ input: "bert/encoder/layer_4/output/dense/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_447/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_447"
+ op: "Mul"
+ input: "Mul_447/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_82"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_280"
+ op: "Add"
+ input: "Mul_446"
+ input: "Mul_447"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_448/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_448"
+ op: "Mul"
+ input: "Mul_448/x"
+ input: "bert/encoder/layer_4/output/dense/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_82"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_82"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_449/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_449"
+ op: "Mul"
+ input: "Mul_449/x"
+ input: "Square_82"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_281"
+ op: "Add"
+ input: "Mul_448"
+ input: "Mul_449"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_82"
+ op: "Sqrt"
+ input: "add_281"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_282/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_282"
+ op: "Add"
+ input: "Sqrt_82"
+ input: "add_282/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_83"
+ op: "RealDiv"
+ input: "add_280"
+ input: "add_282"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_450"
+ op: "Mul"
+ input: "add"
+ input: "truediv_83"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_83"
+ op: "Sub"
+ input: "bert/encoder/layer_4/output/dense/bias/read"
+ input: "mul_450"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_445"
+ op: "Assign"
+ input: "bert/encoder/layer_4/output/dense/bias"
+ input: "sub_83"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_446"
+ op: "Assign"
+ input: "bert/encoder/layer_4/output/dense/bias/adam_m"
+ input: "add_280"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_447"
+ op: "Assign"
+ input: "bert/encoder/layer_4/output/dense/bias/adam_v"
+ input: "add_281"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_451/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_451"
+ op: "Mul"
+ input: "Mul_451/x"
+ input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_452/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_452"
+ op: "Mul"
+ input: "Mul_452/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_83"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_283"
+ op: "Add"
+ input: "Mul_451"
+ input: "Mul_452"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_453/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_453"
+ op: "Mul"
+ input: "Mul_453/x"
+ input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_83"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_83"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_454/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_454"
+ op: "Mul"
+ input: "Mul_454/x"
+ input: "Square_83"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_284"
+ op: "Add"
+ input: "Mul_453"
+ input: "Mul_454"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_83"
+ op: "Sqrt"
+ input: "add_284"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_285/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_285"
+ op: "Add"
+ input: "Sqrt_83"
+ input: "add_285/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_84"
+ op: "RealDiv"
+ input: "add_283"
+ input: "add_285"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_455"
+ op: "Mul"
+ input: "add"
+ input: "truediv_84"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_84"
+ op: "Sub"
+ input: "bert/encoder/layer_4/output/LayerNorm/beta/read"
+ input: "mul_455"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_448"
+ op: "Assign"
+ input: "bert/encoder/layer_4/output/LayerNorm/beta"
+ input: "sub_84"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_449"
+ op: "Assign"
+ input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m"
+ input: "add_283"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_450"
+ op: "Assign"
+ input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v"
+ input: "add_284"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_456/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_456"
+ op: "Mul"
+ input: "Mul_456/x"
+ input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_457/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_457"
+ op: "Mul"
+ input: "Mul_457/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_84"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_286"
+ op: "Add"
+ input: "Mul_456"
+ input: "Mul_457"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_458/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_458"
+ op: "Mul"
+ input: "Mul_458/x"
+ input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_84"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_84"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_459/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_459"
+ op: "Mul"
+ input: "Mul_459/x"
+ input: "Square_84"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_287"
+ op: "Add"
+ input: "Mul_458"
+ input: "Mul_459"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_84"
+ op: "Sqrt"
+ input: "add_287"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_288/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_288"
+ op: "Add"
+ input: "Sqrt_84"
+ input: "add_288/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_85"
+ op: "RealDiv"
+ input: "add_286"
+ input: "add_288"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_460"
+ op: "Mul"
+ input: "add"
+ input: "truediv_85"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_85"
+ op: "Sub"
+ input: "bert/encoder/layer_4/output/LayerNorm/gamma/read"
+ input: "mul_460"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_451"
+ op: "Assign"
+ input: "bert/encoder/layer_4/output/LayerNorm/gamma"
+ input: "sub_85"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_452"
+ op: "Assign"
+ input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m"
+ input: "add_286"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_453"
+ op: "Assign"
+ input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v"
+ input: "add_287"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/query/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/query/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_5/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_5/attention/self/query/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/query/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/query/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/query/kernel/adam_m"
+ input: "bert/encoder/layer_5/attention/self/query/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/query/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/attention/self/query/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/query/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/query/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_5/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_5/attention/self/query/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/query/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/query/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/query/kernel/adam_v"
+ input: "bert/encoder/layer_5/attention/self/query/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/query/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/attention/self/query/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_461/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_461"
+ op: "Mul"
+ input: "Mul_461/x"
+ input: "bert/encoder/layer_5/attention/self/query/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_462/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_462"
+ op: "Mul"
+ input: "Mul_462/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_85"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_289"
+ op: "Add"
+ input: "Mul_461"
+ input: "Mul_462"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_463/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_463"
+ op: "Mul"
+ input: "Mul_463/x"
+ input: "bert/encoder/layer_5/attention/self/query/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_85"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_85"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_464/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_464"
+ op: "Mul"
+ input: "Mul_464/x"
+ input: "Square_85"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_290"
+ op: "Add"
+ input: "Mul_463"
+ input: "Mul_464"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_85"
+ op: "Sqrt"
+ input: "add_290"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_291/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_291"
+ op: "Add"
+ input: "Sqrt_85"
+ input: "add_291/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_86"
+ op: "RealDiv"
+ input: "add_289"
+ input: "add_291"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_465/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_465"
+ op: "Mul"
+ input: "mul_465/x"
+ input: "bert/encoder/layer_5/attention/self/query/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_292"
+ op: "Add"
+ input: "truediv_86"
+ input: "mul_465"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_466"
+ op: "Mul"
+ input: "add"
+ input: "add_292"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_86"
+ op: "Sub"
+ input: "bert/encoder/layer_5/attention/self/query/kernel/read"
+ input: "mul_466"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_454"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/query/kernel"
+ input: "sub_86"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_455"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/query/kernel/adam_m"
+ input: "add_289"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_456"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/query/kernel/adam_v"
+ input: "add_290"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/query/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/query/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/query/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/query/bias/adam_m"
+ input: "bert/encoder/layer_5/attention/self/query/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/query/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/attention/self/query/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/query/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/query/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/query/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/query/bias/adam_v"
+ input: "bert/encoder/layer_5/attention/self/query/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/query/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/attention/self/query/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_467/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_467"
+ op: "Mul"
+ input: "Mul_467/x"
+ input: "bert/encoder/layer_5/attention/self/query/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_468/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_468"
+ op: "Mul"
+ input: "Mul_468/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_86"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_293"
+ op: "Add"
+ input: "Mul_467"
+ input: "Mul_468"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_469/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_469"
+ op: "Mul"
+ input: "Mul_469/x"
+ input: "bert/encoder/layer_5/attention/self/query/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_86"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_86"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_470/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_470"
+ op: "Mul"
+ input: "Mul_470/x"
+ input: "Square_86"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_294"
+ op: "Add"
+ input: "Mul_469"
+ input: "Mul_470"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_86"
+ op: "Sqrt"
+ input: "add_294"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_295/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_295"
+ op: "Add"
+ input: "Sqrt_86"
+ input: "add_295/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_87"
+ op: "RealDiv"
+ input: "add_293"
+ input: "add_295"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_471"
+ op: "Mul"
+ input: "add"
+ input: "truediv_87"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_87"
+ op: "Sub"
+ input: "bert/encoder/layer_5/attention/self/query/bias/read"
+ input: "mul_471"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_457"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/query/bias"
+ input: "sub_87"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_458"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/query/bias/adam_m"
+ input: "add_293"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_459"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/query/bias/adam_v"
+ input: "add_294"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/key/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/key/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_5/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_5/attention/self/key/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/key/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/key/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/key/kernel/adam_m"
+ input: "bert/encoder/layer_5/attention/self/key/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/key/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/attention/self/key/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/key/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/key/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_5/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_5/attention/self/key/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/key/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/key/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/key/kernel/adam_v"
+ input: "bert/encoder/layer_5/attention/self/key/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/key/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/attention/self/key/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_472/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_472"
+ op: "Mul"
+ input: "Mul_472/x"
+ input: "bert/encoder/layer_5/attention/self/key/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_473/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_473"
+ op: "Mul"
+ input: "Mul_473/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_87"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_296"
+ op: "Add"
+ input: "Mul_472"
+ input: "Mul_473"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_474/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_474"
+ op: "Mul"
+ input: "Mul_474/x"
+ input: "bert/encoder/layer_5/attention/self/key/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_87"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_87"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_475/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_475"
+ op: "Mul"
+ input: "Mul_475/x"
+ input: "Square_87"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_297"
+ op: "Add"
+ input: "Mul_474"
+ input: "Mul_475"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_87"
+ op: "Sqrt"
+ input: "add_297"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_298/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_298"
+ op: "Add"
+ input: "Sqrt_87"
+ input: "add_298/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_88"
+ op: "RealDiv"
+ input: "add_296"
+ input: "add_298"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_476/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_476"
+ op: "Mul"
+ input: "mul_476/x"
+ input: "bert/encoder/layer_5/attention/self/key/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_299"
+ op: "Add"
+ input: "truediv_88"
+ input: "mul_476"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_477"
+ op: "Mul"
+ input: "add"
+ input: "add_299"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_88"
+ op: "Sub"
+ input: "bert/encoder/layer_5/attention/self/key/kernel/read"
+ input: "mul_477"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_460"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/key/kernel"
+ input: "sub_88"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_461"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/key/kernel/adam_m"
+ input: "add_296"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_462"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/key/kernel/adam_v"
+ input: "add_297"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/key/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/key/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/key/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/key/bias/adam_m"
+ input: "bert/encoder/layer_5/attention/self/key/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/key/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/attention/self/key/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/key/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/key/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/key/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/key/bias/adam_v"
+ input: "bert/encoder/layer_5/attention/self/key/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/key/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/attention/self/key/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_478/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_478"
+ op: "Mul"
+ input: "Mul_478/x"
+ input: "bert/encoder/layer_5/attention/self/key/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_479/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_479"
+ op: "Mul"
+ input: "Mul_479/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_88"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_300"
+ op: "Add"
+ input: "Mul_478"
+ input: "Mul_479"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_480/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_480"
+ op: "Mul"
+ input: "Mul_480/x"
+ input: "bert/encoder/layer_5/attention/self/key/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_88"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_88"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_481/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_481"
+ op: "Mul"
+ input: "Mul_481/x"
+ input: "Square_88"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_301"
+ op: "Add"
+ input: "Mul_480"
+ input: "Mul_481"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_88"
+ op: "Sqrt"
+ input: "add_301"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_302/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_302"
+ op: "Add"
+ input: "Sqrt_88"
+ input: "add_302/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_89"
+ op: "RealDiv"
+ input: "add_300"
+ input: "add_302"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_482"
+ op: "Mul"
+ input: "add"
+ input: "truediv_89"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_89"
+ op: "Sub"
+ input: "bert/encoder/layer_5/attention/self/key/bias/read"
+ input: "mul_482"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_463"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/key/bias"
+ input: "sub_89"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_464"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/key/bias/adam_m"
+ input: "add_300"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_465"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/key/bias/adam_v"
+ input: "add_301"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/value/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/value/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_5/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_5/attention/self/value/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/value/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/value/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/value/kernel/adam_m"
+ input: "bert/encoder/layer_5/attention/self/value/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/value/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/attention/self/value/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/value/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/value/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_5/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_5/attention/self/value/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/value/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/value/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/value/kernel/adam_v"
+ input: "bert/encoder/layer_5/attention/self/value/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/value/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/attention/self/value/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_483/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_483"
+ op: "Mul"
+ input: "Mul_483/x"
+ input: "bert/encoder/layer_5/attention/self/value/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_484/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_484"
+ op: "Mul"
+ input: "Mul_484/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_89"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_303"
+ op: "Add"
+ input: "Mul_483"
+ input: "Mul_484"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_485/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_485"
+ op: "Mul"
+ input: "Mul_485/x"
+ input: "bert/encoder/layer_5/attention/self/value/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_89"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_89"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_486/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_486"
+ op: "Mul"
+ input: "Mul_486/x"
+ input: "Square_89"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_304"
+ op: "Add"
+ input: "Mul_485"
+ input: "Mul_486"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_89"
+ op: "Sqrt"
+ input: "add_304"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_305/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_305"
+ op: "Add"
+ input: "Sqrt_89"
+ input: "add_305/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_90"
+ op: "RealDiv"
+ input: "add_303"
+ input: "add_305"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_487/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_487"
+ op: "Mul"
+ input: "mul_487/x"
+ input: "bert/encoder/layer_5/attention/self/value/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_306"
+ op: "Add"
+ input: "truediv_90"
+ input: "mul_487"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_488"
+ op: "Mul"
+ input: "add"
+ input: "add_306"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_90"
+ op: "Sub"
+ input: "bert/encoder/layer_5/attention/self/value/kernel/read"
+ input: "mul_488"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_466"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/value/kernel"
+ input: "sub_90"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_467"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/value/kernel/adam_m"
+ input: "add_303"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_468"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/value/kernel/adam_v"
+ input: "add_304"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/value/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/value/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/value/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/value/bias/adam_m"
+ input: "bert/encoder/layer_5/attention/self/value/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/value/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/attention/self/value/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/value/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/value/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/value/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/value/bias/adam_v"
+ input: "bert/encoder/layer_5/attention/self/value/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/self/value/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/attention/self/value/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_489/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_489"
+ op: "Mul"
+ input: "Mul_489/x"
+ input: "bert/encoder/layer_5/attention/self/value/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_490/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_490"
+ op: "Mul"
+ input: "Mul_490/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_90"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_307"
+ op: "Add"
+ input: "Mul_489"
+ input: "Mul_490"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_491/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_491"
+ op: "Mul"
+ input: "Mul_491/x"
+ input: "bert/encoder/layer_5/attention/self/value/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_90"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_90"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_492/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_492"
+ op: "Mul"
+ input: "Mul_492/x"
+ input: "Square_90"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_308"
+ op: "Add"
+ input: "Mul_491"
+ input: "Mul_492"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_90"
+ op: "Sqrt"
+ input: "add_308"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_309/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_309"
+ op: "Add"
+ input: "Sqrt_90"
+ input: "add_309/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_91"
+ op: "RealDiv"
+ input: "add_307"
+ input: "add_309"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_493"
+ op: "Mul"
+ input: "add"
+ input: "truediv_91"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_91"
+ op: "Sub"
+ input: "bert/encoder/layer_5/attention/self/value/bias/read"
+ input: "mul_493"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_469"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/value/bias"
+ input: "sub_91"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_470"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/value/bias/adam_m"
+ input: "add_307"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_471"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/self/value/bias/adam_v"
+ input: "add_308"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_494/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_494"
+ op: "Mul"
+ input: "Mul_494/x"
+ input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_495/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_495"
+ op: "Mul"
+ input: "Mul_495/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_91"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_310"
+ op: "Add"
+ input: "Mul_494"
+ input: "Mul_495"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_496/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_496"
+ op: "Mul"
+ input: "Mul_496/x"
+ input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_91"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_91"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_497/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_497"
+ op: "Mul"
+ input: "Mul_497/x"
+ input: "Square_91"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_311"
+ op: "Add"
+ input: "Mul_496"
+ input: "Mul_497"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_91"
+ op: "Sqrt"
+ input: "add_311"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_312/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_312"
+ op: "Add"
+ input: "Sqrt_91"
+ input: "add_312/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_92"
+ op: "RealDiv"
+ input: "add_310"
+ input: "add_312"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_498/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_498"
+ op: "Mul"
+ input: "mul_498/x"
+ input: "bert/encoder/layer_5/attention/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_313"
+ op: "Add"
+ input: "truediv_92"
+ input: "mul_498"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_499"
+ op: "Mul"
+ input: "add"
+ input: "add_313"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_92"
+ op: "Sub"
+ input: "bert/encoder/layer_5/attention/output/dense/kernel/read"
+ input: "mul_499"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_472"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/output/dense/kernel"
+ input: "sub_92"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_473"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m"
+ input: "add_310"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_474"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v"
+ input: "add_311"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dense/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dense/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dense/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_5/attention/output/dense/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dense/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/attention/output/dense/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dense/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dense/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dense/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_5/attention/output/dense/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/dense/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/attention/output/dense/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_500/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_500"
+ op: "Mul"
+ input: "Mul_500/x"
+ input: "bert/encoder/layer_5/attention/output/dense/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_501/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_501"
+ op: "Mul"
+ input: "Mul_501/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_92"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_314"
+ op: "Add"
+ input: "Mul_500"
+ input: "Mul_501"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_502/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_502"
+ op: "Mul"
+ input: "Mul_502/x"
+ input: "bert/encoder/layer_5/attention/output/dense/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_92"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_92"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_503/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_503"
+ op: "Mul"
+ input: "Mul_503/x"
+ input: "Square_92"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_315"
+ op: "Add"
+ input: "Mul_502"
+ input: "Mul_503"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_92"
+ op: "Sqrt"
+ input: "add_315"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_316/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_316"
+ op: "Add"
+ input: "Sqrt_92"
+ input: "add_316/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_93"
+ op: "RealDiv"
+ input: "add_314"
+ input: "add_316"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_504"
+ op: "Mul"
+ input: "add"
+ input: "truediv_93"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_93"
+ op: "Sub"
+ input: "bert/encoder/layer_5/attention/output/dense/bias/read"
+ input: "mul_504"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_475"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/output/dense/bias"
+ input: "sub_93"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_476"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/output/dense/bias/adam_m"
+ input: "add_314"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_477"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/output/dense/bias/adam_v"
+ input: "add_315"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_505/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_505"
+ op: "Mul"
+ input: "Mul_505/x"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_506/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_506"
+ op: "Mul"
+ input: "Mul_506/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_93"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_317"
+ op: "Add"
+ input: "Mul_505"
+ input: "Mul_506"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_507/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_507"
+ op: "Mul"
+ input: "Mul_507/x"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_93"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_93"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_508/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_508"
+ op: "Mul"
+ input: "Mul_508/x"
+ input: "Square_93"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_318"
+ op: "Add"
+ input: "Mul_507"
+ input: "Mul_508"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_93"
+ op: "Sqrt"
+ input: "add_318"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_319/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_319"
+ op: "Add"
+ input: "Sqrt_93"
+ input: "add_319/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_94"
+ op: "RealDiv"
+ input: "add_317"
+ input: "add_319"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_509"
+ op: "Mul"
+ input: "add"
+ input: "truediv_94"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_94"
+ op: "Sub"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/read"
+ input: "mul_509"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_478"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/beta"
+ input: "sub_94"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_479"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m"
+ input: "add_317"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_480"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v"
+ input: "add_318"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_510/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_510"
+ op: "Mul"
+ input: "Mul_510/x"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_511/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_511"
+ op: "Mul"
+ input: "Mul_511/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_94"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_320"
+ op: "Add"
+ input: "Mul_510"
+ input: "Mul_511"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_512/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_512"
+ op: "Mul"
+ input: "Mul_512/x"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_94"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_94"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_513/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_513"
+ op: "Mul"
+ input: "Mul_513/x"
+ input: "Square_94"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_321"
+ op: "Add"
+ input: "Mul_512"
+ input: "Mul_513"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_94"
+ op: "Sqrt"
+ input: "add_321"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_322/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_322"
+ op: "Add"
+ input: "Sqrt_94"
+ input: "add_322/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_95"
+ op: "RealDiv"
+ input: "add_320"
+ input: "add_322"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_514"
+ op: "Mul"
+ input: "add"
+ input: "truediv_95"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_95"
+ op: "Sub"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/read"
+ input: "mul_514"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_481"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma"
+ input: "sub_95"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_482"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m"
+ input: "add_320"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_483"
+ op: "Assign"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v"
+ input: "add_321"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m"
+ input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v"
+ input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_515/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_515"
+ op: "Mul"
+ input: "Mul_515/x"
+ input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_516/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_516"
+ op: "Mul"
+ input: "Mul_516/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_95"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_323"
+ op: "Add"
+ input: "Mul_515"
+ input: "Mul_516"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_517/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_517"
+ op: "Mul"
+ input: "Mul_517/x"
+ input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_95"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_95"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_518/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_518"
+ op: "Mul"
+ input: "Mul_518/x"
+ input: "Square_95"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_324"
+ op: "Add"
+ input: "Mul_517"
+ input: "Mul_518"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_95"
+ op: "Sqrt"
+ input: "add_324"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_325/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_325"
+ op: "Add"
+ input: "Sqrt_95"
+ input: "add_325/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_96"
+ op: "RealDiv"
+ input: "add_323"
+ input: "add_325"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_519/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_519"
+ op: "Mul"
+ input: "mul_519/x"
+ input: "bert/encoder/layer_5/intermediate/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_326"
+ op: "Add"
+ input: "truediv_96"
+ input: "mul_519"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_520"
+ op: "Mul"
+ input: "add"
+ input: "add_326"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_96"
+ op: "Sub"
+ input: "bert/encoder/layer_5/intermediate/dense/kernel/read"
+ input: "mul_520"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_484"
+ op: "Assign"
+ input: "bert/encoder/layer_5/intermediate/dense/kernel"
+ input: "sub_96"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_485"
+ op: "Assign"
+ input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m"
+ input: "add_323"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_486"
+ op: "Assign"
+ input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v"
+ input: "add_324"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 3072
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/bias/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/bias/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_5/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_5/intermediate/dense/bias/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/intermediate/dense/bias/adam_m"
+ input: "bert/encoder/layer_5/intermediate/dense/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/intermediate/dense/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 3072
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/bias/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/bias/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_5/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_5/intermediate/dense/bias/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/intermediate/dense/bias/adam_v"
+ input: "bert/encoder/layer_5/intermediate/dense/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/intermediate/dense/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/intermediate/dense/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_521/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_521"
+ op: "Mul"
+ input: "Mul_521/x"
+ input: "bert/encoder/layer_5/intermediate/dense/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_522/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_522"
+ op: "Mul"
+ input: "Mul_522/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_96"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_327"
+ op: "Add"
+ input: "Mul_521"
+ input: "Mul_522"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_523/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_523"
+ op: "Mul"
+ input: "Mul_523/x"
+ input: "bert/encoder/layer_5/intermediate/dense/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_96"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_96"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_524/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_524"
+ op: "Mul"
+ input: "Mul_524/x"
+ input: "Square_96"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_328"
+ op: "Add"
+ input: "Mul_523"
+ input: "Mul_524"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_96"
+ op: "Sqrt"
+ input: "add_328"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_329/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_329"
+ op: "Add"
+ input: "Sqrt_96"
+ input: "add_329/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_97"
+ op: "RealDiv"
+ input: "add_327"
+ input: "add_329"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_525"
+ op: "Mul"
+ input: "add"
+ input: "truediv_97"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_97"
+ op: "Sub"
+ input: "bert/encoder/layer_5/intermediate/dense/bias/read"
+ input: "mul_525"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_487"
+ op: "Assign"
+ input: "bert/encoder/layer_5/intermediate/dense/bias"
+ input: "sub_97"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_488"
+ op: "Assign"
+ input: "bert/encoder/layer_5/intermediate/dense/bias/adam_m"
+ input: "add_327"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_489"
+ op: "Assign"
+ input: "bert/encoder/layer_5/intermediate/dense/bias/adam_v"
+ input: "add_328"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\014\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dense/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_5/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_5/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dense/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dense/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_5/output/dense/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dense/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/output/dense/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\014\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dense/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_5/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_5/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dense/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dense/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_5/output/dense/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dense/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/output/dense/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_526/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_526"
+ op: "Mul"
+ input: "Mul_526/x"
+ input: "bert/encoder/layer_5/output/dense/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_527/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_527"
+ op: "Mul"
+ input: "Mul_527/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_97"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_330"
+ op: "Add"
+ input: "Mul_526"
+ input: "Mul_527"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_528/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_528"
+ op: "Mul"
+ input: "Mul_528/x"
+ input: "bert/encoder/layer_5/output/dense/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_97"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_97"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_529/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_529"
+ op: "Mul"
+ input: "Mul_529/x"
+ input: "Square_97"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_331"
+ op: "Add"
+ input: "Mul_528"
+ input: "Mul_529"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_97"
+ op: "Sqrt"
+ input: "add_331"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_332/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_332"
+ op: "Add"
+ input: "Sqrt_97"
+ input: "add_332/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_98"
+ op: "RealDiv"
+ input: "add_330"
+ input: "add_332"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_530/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_530"
+ op: "Mul"
+ input: "mul_530/x"
+ input: "bert/encoder/layer_5/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_333"
+ op: "Add"
+ input: "truediv_98"
+ input: "mul_530"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_531"
+ op: "Mul"
+ input: "add"
+ input: "add_333"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_98"
+ op: "Sub"
+ input: "bert/encoder/layer_5/output/dense/kernel/read"
+ input: "mul_531"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_490"
+ op: "Assign"
+ input: "bert/encoder/layer_5/output/dense/kernel"
+ input: "sub_98"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_491"
+ op: "Assign"
+ input: "bert/encoder/layer_5/output/dense/kernel/adam_m"
+ input: "add_330"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_492"
+ op: "Assign"
+ input: "bert/encoder/layer_5/output/dense/kernel/adam_v"
+ input: "add_331"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dense/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dense/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dense/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_5/output/dense/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dense/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/output/dense/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dense/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dense/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dense/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_5/output/dense/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/dense/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/output/dense/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_532/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_532"
+ op: "Mul"
+ input: "Mul_532/x"
+ input: "bert/encoder/layer_5/output/dense/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_533/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_533"
+ op: "Mul"
+ input: "Mul_533/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_98"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_334"
+ op: "Add"
+ input: "Mul_532"
+ input: "Mul_533"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_534/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_534"
+ op: "Mul"
+ input: "Mul_534/x"
+ input: "bert/encoder/layer_5/output/dense/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_98"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_98"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_535/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_535"
+ op: "Mul"
+ input: "Mul_535/x"
+ input: "Square_98"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_335"
+ op: "Add"
+ input: "Mul_534"
+ input: "Mul_535"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_98"
+ op: "Sqrt"
+ input: "add_335"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_336/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_336"
+ op: "Add"
+ input: "Sqrt_98"
+ input: "add_336/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_99"
+ op: "RealDiv"
+ input: "add_334"
+ input: "add_336"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_536"
+ op: "Mul"
+ input: "add"
+ input: "truediv_99"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_99"
+ op: "Sub"
+ input: "bert/encoder/layer_5/output/dense/bias/read"
+ input: "mul_536"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_493"
+ op: "Assign"
+ input: "bert/encoder/layer_5/output/dense/bias"
+ input: "sub_99"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_494"
+ op: "Assign"
+ input: "bert/encoder/layer_5/output/dense/bias/adam_m"
+ input: "add_334"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_495"
+ op: "Assign"
+ input: "bert/encoder/layer_5/output/dense/bias/adam_v"
+ input: "add_335"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_537/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_537"
+ op: "Mul"
+ input: "Mul_537/x"
+ input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_538/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_538"
+ op: "Mul"
+ input: "Mul_538/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_99"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_337"
+ op: "Add"
+ input: "Mul_537"
+ input: "Mul_538"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_539/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_539"
+ op: "Mul"
+ input: "Mul_539/x"
+ input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_99"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_99"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_540/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_540"
+ op: "Mul"
+ input: "Mul_540/x"
+ input: "Square_99"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_338"
+ op: "Add"
+ input: "Mul_539"
+ input: "Mul_540"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_99"
+ op: "Sqrt"
+ input: "add_338"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_339/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_339"
+ op: "Add"
+ input: "Sqrt_99"
+ input: "add_339/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_100"
+ op: "RealDiv"
+ input: "add_337"
+ input: "add_339"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_541"
+ op: "Mul"
+ input: "add"
+ input: "truediv_100"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_100"
+ op: "Sub"
+ input: "bert/encoder/layer_5/output/LayerNorm/beta/read"
+ input: "mul_541"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_496"
+ op: "Assign"
+ input: "bert/encoder/layer_5/output/LayerNorm/beta"
+ input: "sub_100"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_497"
+ op: "Assign"
+ input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m"
+ input: "add_337"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_498"
+ op: "Assign"
+ input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v"
+ input: "add_338"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_542/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_542"
+ op: "Mul"
+ input: "Mul_542/x"
+ input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_543/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_543"
+ op: "Mul"
+ input: "Mul_543/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_100"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_340"
+ op: "Add"
+ input: "Mul_542"
+ input: "Mul_543"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_544/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_544"
+ op: "Mul"
+ input: "Mul_544/x"
+ input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_100"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_100"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_545/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_545"
+ op: "Mul"
+ input: "Mul_545/x"
+ input: "Square_100"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_341"
+ op: "Add"
+ input: "Mul_544"
+ input: "Mul_545"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_100"
+ op: "Sqrt"
+ input: "add_341"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_342/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_342"
+ op: "Add"
+ input: "Sqrt_100"
+ input: "add_342/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_101"
+ op: "RealDiv"
+ input: "add_340"
+ input: "add_342"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_546"
+ op: "Mul"
+ input: "add"
+ input: "truediv_101"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_101"
+ op: "Sub"
+ input: "bert/encoder/layer_5/output/LayerNorm/gamma/read"
+ input: "mul_546"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_499"
+ op: "Assign"
+ input: "bert/encoder/layer_5/output/LayerNorm/gamma"
+ input: "sub_101"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_500"
+ op: "Assign"
+ input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m"
+ input: "add_340"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_501"
+ op: "Assign"
+ input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v"
+ input: "add_341"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/query/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/query/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_6/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_6/attention/self/query/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/query/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/query/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/query/kernel/adam_m"
+ input: "bert/encoder/layer_6/attention/self/query/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/query/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/attention/self/query/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/query/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/query/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_6/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_6/attention/self/query/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/query/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/query/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/query/kernel/adam_v"
+ input: "bert/encoder/layer_6/attention/self/query/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/query/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/attention/self/query/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_547/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_547"
+ op: "Mul"
+ input: "Mul_547/x"
+ input: "bert/encoder/layer_6/attention/self/query/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_548/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_548"
+ op: "Mul"
+ input: "Mul_548/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_101"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_343"
+ op: "Add"
+ input: "Mul_547"
+ input: "Mul_548"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_549/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_549"
+ op: "Mul"
+ input: "Mul_549/x"
+ input: "bert/encoder/layer_6/attention/self/query/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_101"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_101"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_550/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_550"
+ op: "Mul"
+ input: "Mul_550/x"
+ input: "Square_101"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_344"
+ op: "Add"
+ input: "Mul_549"
+ input: "Mul_550"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_101"
+ op: "Sqrt"
+ input: "add_344"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_345/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_345"
+ op: "Add"
+ input: "Sqrt_101"
+ input: "add_345/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_102"
+ op: "RealDiv"
+ input: "add_343"
+ input: "add_345"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_551/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_551"
+ op: "Mul"
+ input: "mul_551/x"
+ input: "bert/encoder/layer_6/attention/self/query/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_346"
+ op: "Add"
+ input: "truediv_102"
+ input: "mul_551"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_552"
+ op: "Mul"
+ input: "add"
+ input: "add_346"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_102"
+ op: "Sub"
+ input: "bert/encoder/layer_6/attention/self/query/kernel/read"
+ input: "mul_552"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_502"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/query/kernel"
+ input: "sub_102"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_503"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/query/kernel/adam_m"
+ input: "add_343"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_504"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/query/kernel/adam_v"
+ input: "add_344"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/query/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/query/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/query/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/query/bias/adam_m"
+ input: "bert/encoder/layer_6/attention/self/query/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/query/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/attention/self/query/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/query/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/query/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/query/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/query/bias/adam_v"
+ input: "bert/encoder/layer_6/attention/self/query/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/query/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/attention/self/query/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_553/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_553"
+ op: "Mul"
+ input: "Mul_553/x"
+ input: "bert/encoder/layer_6/attention/self/query/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_554/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_554"
+ op: "Mul"
+ input: "Mul_554/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_102"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_347"
+ op: "Add"
+ input: "Mul_553"
+ input: "Mul_554"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_555/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_555"
+ op: "Mul"
+ input: "Mul_555/x"
+ input: "bert/encoder/layer_6/attention/self/query/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_102"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_102"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_556/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_556"
+ op: "Mul"
+ input: "Mul_556/x"
+ input: "Square_102"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_348"
+ op: "Add"
+ input: "Mul_555"
+ input: "Mul_556"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_102"
+ op: "Sqrt"
+ input: "add_348"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_349/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_349"
+ op: "Add"
+ input: "Sqrt_102"
+ input: "add_349/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_103"
+ op: "RealDiv"
+ input: "add_347"
+ input: "add_349"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_557"
+ op: "Mul"
+ input: "add"
+ input: "truediv_103"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_103"
+ op: "Sub"
+ input: "bert/encoder/layer_6/attention/self/query/bias/read"
+ input: "mul_557"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_505"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/query/bias"
+ input: "sub_103"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_506"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/query/bias/adam_m"
+ input: "add_347"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_507"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/query/bias/adam_v"
+ input: "add_348"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/key/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/key/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_6/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_6/attention/self/key/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/key/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/key/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/key/kernel/adam_m"
+ input: "bert/encoder/layer_6/attention/self/key/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/key/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/attention/self/key/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/key/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/key/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_6/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_6/attention/self/key/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/key/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/key/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/key/kernel/adam_v"
+ input: "bert/encoder/layer_6/attention/self/key/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/key/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/attention/self/key/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_558/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_558"
+ op: "Mul"
+ input: "Mul_558/x"
+ input: "bert/encoder/layer_6/attention/self/key/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_559/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_559"
+ op: "Mul"
+ input: "Mul_559/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_103"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_350"
+ op: "Add"
+ input: "Mul_558"
+ input: "Mul_559"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_560/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_560"
+ op: "Mul"
+ input: "Mul_560/x"
+ input: "bert/encoder/layer_6/attention/self/key/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_103"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_103"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_561/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_561"
+ op: "Mul"
+ input: "Mul_561/x"
+ input: "Square_103"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_351"
+ op: "Add"
+ input: "Mul_560"
+ input: "Mul_561"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_103"
+ op: "Sqrt"
+ input: "add_351"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_352/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_352"
+ op: "Add"
+ input: "Sqrt_103"
+ input: "add_352/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_104"
+ op: "RealDiv"
+ input: "add_350"
+ input: "add_352"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_562/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_562"
+ op: "Mul"
+ input: "mul_562/x"
+ input: "bert/encoder/layer_6/attention/self/key/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_353"
+ op: "Add"
+ input: "truediv_104"
+ input: "mul_562"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_563"
+ op: "Mul"
+ input: "add"
+ input: "add_353"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_104"
+ op: "Sub"
+ input: "bert/encoder/layer_6/attention/self/key/kernel/read"
+ input: "mul_563"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_508"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/key/kernel"
+ input: "sub_104"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_509"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/key/kernel/adam_m"
+ input: "add_350"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_510"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/key/kernel/adam_v"
+ input: "add_351"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/key/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/key/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/key/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/key/bias/adam_m"
+ input: "bert/encoder/layer_6/attention/self/key/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/key/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/attention/self/key/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/key/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/key/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/key/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/key/bias/adam_v"
+ input: "bert/encoder/layer_6/attention/self/key/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/key/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/attention/self/key/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_564/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_564"
+ op: "Mul"
+ input: "Mul_564/x"
+ input: "bert/encoder/layer_6/attention/self/key/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_565/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_565"
+ op: "Mul"
+ input: "Mul_565/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_104"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_354"
+ op: "Add"
+ input: "Mul_564"
+ input: "Mul_565"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_566/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_566"
+ op: "Mul"
+ input: "Mul_566/x"
+ input: "bert/encoder/layer_6/attention/self/key/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_104"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_104"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_567/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_567"
+ op: "Mul"
+ input: "Mul_567/x"
+ input: "Square_104"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_355"
+ op: "Add"
+ input: "Mul_566"
+ input: "Mul_567"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_104"
+ op: "Sqrt"
+ input: "add_355"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_356/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_356"
+ op: "Add"
+ input: "Sqrt_104"
+ input: "add_356/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_105"
+ op: "RealDiv"
+ input: "add_354"
+ input: "add_356"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_568"
+ op: "Mul"
+ input: "add"
+ input: "truediv_105"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_105"
+ op: "Sub"
+ input: "bert/encoder/layer_6/attention/self/key/bias/read"
+ input: "mul_568"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_511"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/key/bias"
+ input: "sub_105"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_512"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/key/bias/adam_m"
+ input: "add_354"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_513"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/key/bias/adam_v"
+ input: "add_355"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/value/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/value/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_6/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_6/attention/self/value/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/value/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/value/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/value/kernel/adam_m"
+ input: "bert/encoder/layer_6/attention/self/value/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/value/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/attention/self/value/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/value/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/value/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_6/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_6/attention/self/value/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/value/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/value/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/value/kernel/adam_v"
+ input: "bert/encoder/layer_6/attention/self/value/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/value/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/attention/self/value/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_569/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_569"
+ op: "Mul"
+ input: "Mul_569/x"
+ input: "bert/encoder/layer_6/attention/self/value/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_570/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_570"
+ op: "Mul"
+ input: "Mul_570/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_105"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_357"
+ op: "Add"
+ input: "Mul_569"
+ input: "Mul_570"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_571/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_571"
+ op: "Mul"
+ input: "Mul_571/x"
+ input: "bert/encoder/layer_6/attention/self/value/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_105"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_105"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_572/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_572"
+ op: "Mul"
+ input: "Mul_572/x"
+ input: "Square_105"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_358"
+ op: "Add"
+ input: "Mul_571"
+ input: "Mul_572"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_105"
+ op: "Sqrt"
+ input: "add_358"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_359/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_359"
+ op: "Add"
+ input: "Sqrt_105"
+ input: "add_359/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_106"
+ op: "RealDiv"
+ input: "add_357"
+ input: "add_359"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_573/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_573"
+ op: "Mul"
+ input: "mul_573/x"
+ input: "bert/encoder/layer_6/attention/self/value/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_360"
+ op: "Add"
+ input: "truediv_106"
+ input: "mul_573"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_574"
+ op: "Mul"
+ input: "add"
+ input: "add_360"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_106"
+ op: "Sub"
+ input: "bert/encoder/layer_6/attention/self/value/kernel/read"
+ input: "mul_574"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_514"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/value/kernel"
+ input: "sub_106"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_515"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/value/kernel/adam_m"
+ input: "add_357"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_516"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/value/kernel/adam_v"
+ input: "add_358"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/value/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/value/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/value/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/value/bias/adam_m"
+ input: "bert/encoder/layer_6/attention/self/value/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/value/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/attention/self/value/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/value/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/value/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/value/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/value/bias/adam_v"
+ input: "bert/encoder/layer_6/attention/self/value/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/self/value/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/attention/self/value/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_575/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_575"
+ op: "Mul"
+ input: "Mul_575/x"
+ input: "bert/encoder/layer_6/attention/self/value/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_576/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_576"
+ op: "Mul"
+ input: "Mul_576/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_106"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_361"
+ op: "Add"
+ input: "Mul_575"
+ input: "Mul_576"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_577/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_577"
+ op: "Mul"
+ input: "Mul_577/x"
+ input: "bert/encoder/layer_6/attention/self/value/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_106"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_106"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_578/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_578"
+ op: "Mul"
+ input: "Mul_578/x"
+ input: "Square_106"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_362"
+ op: "Add"
+ input: "Mul_577"
+ input: "Mul_578"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_106"
+ op: "Sqrt"
+ input: "add_362"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_363/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_363"
+ op: "Add"
+ input: "Sqrt_106"
+ input: "add_363/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_107"
+ op: "RealDiv"
+ input: "add_361"
+ input: "add_363"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_579"
+ op: "Mul"
+ input: "add"
+ input: "truediv_107"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_107"
+ op: "Sub"
+ input: "bert/encoder/layer_6/attention/self/value/bias/read"
+ input: "mul_579"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_517"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/value/bias"
+ input: "sub_107"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_518"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/value/bias/adam_m"
+ input: "add_361"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_519"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/self/value/bias/adam_v"
+ input: "add_362"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_580/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_580"
+ op: "Mul"
+ input: "Mul_580/x"
+ input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_581/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_581"
+ op: "Mul"
+ input: "Mul_581/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_107"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_364"
+ op: "Add"
+ input: "Mul_580"
+ input: "Mul_581"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_582/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_582"
+ op: "Mul"
+ input: "Mul_582/x"
+ input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_107"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_107"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_583/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_583"
+ op: "Mul"
+ input: "Mul_583/x"
+ input: "Square_107"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_365"
+ op: "Add"
+ input: "Mul_582"
+ input: "Mul_583"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_107"
+ op: "Sqrt"
+ input: "add_365"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_366/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_366"
+ op: "Add"
+ input: "Sqrt_107"
+ input: "add_366/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_108"
+ op: "RealDiv"
+ input: "add_364"
+ input: "add_366"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_584/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_584"
+ op: "Mul"
+ input: "mul_584/x"
+ input: "bert/encoder/layer_6/attention/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_367"
+ op: "Add"
+ input: "truediv_108"
+ input: "mul_584"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_585"
+ op: "Mul"
+ input: "add"
+ input: "add_367"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_108"
+ op: "Sub"
+ input: "bert/encoder/layer_6/attention/output/dense/kernel/read"
+ input: "mul_585"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_520"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/output/dense/kernel"
+ input: "sub_108"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_521"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m"
+ input: "add_364"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_522"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v"
+ input: "add_365"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dense/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dense/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dense/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_6/attention/output/dense/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dense/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/attention/output/dense/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dense/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dense/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dense/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_6/attention/output/dense/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/dense/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/attention/output/dense/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_586/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_586"
+ op: "Mul"
+ input: "Mul_586/x"
+ input: "bert/encoder/layer_6/attention/output/dense/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_587/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_587"
+ op: "Mul"
+ input: "Mul_587/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_108"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_368"
+ op: "Add"
+ input: "Mul_586"
+ input: "Mul_587"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_588/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_588"
+ op: "Mul"
+ input: "Mul_588/x"
+ input: "bert/encoder/layer_6/attention/output/dense/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_108"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_108"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_589/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_589"
+ op: "Mul"
+ input: "Mul_589/x"
+ input: "Square_108"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_369"
+ op: "Add"
+ input: "Mul_588"
+ input: "Mul_589"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_108"
+ op: "Sqrt"
+ input: "add_369"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_370/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_370"
+ op: "Add"
+ input: "Sqrt_108"
+ input: "add_370/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_109"
+ op: "RealDiv"
+ input: "add_368"
+ input: "add_370"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_590"
+ op: "Mul"
+ input: "add"
+ input: "truediv_109"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_109"
+ op: "Sub"
+ input: "bert/encoder/layer_6/attention/output/dense/bias/read"
+ input: "mul_590"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_523"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/output/dense/bias"
+ input: "sub_109"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_524"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/output/dense/bias/adam_m"
+ input: "add_368"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_525"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/output/dense/bias/adam_v"
+ input: "add_369"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_591/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_591"
+ op: "Mul"
+ input: "Mul_591/x"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_592/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_592"
+ op: "Mul"
+ input: "Mul_592/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_109"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_371"
+ op: "Add"
+ input: "Mul_591"
+ input: "Mul_592"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_593/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_593"
+ op: "Mul"
+ input: "Mul_593/x"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_109"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_109"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_594/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_594"
+ op: "Mul"
+ input: "Mul_594/x"
+ input: "Square_109"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_372"
+ op: "Add"
+ input: "Mul_593"
+ input: "Mul_594"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_109"
+ op: "Sqrt"
+ input: "add_372"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_373/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_373"
+ op: "Add"
+ input: "Sqrt_109"
+ input: "add_373/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_110"
+ op: "RealDiv"
+ input: "add_371"
+ input: "add_373"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_595"
+ op: "Mul"
+ input: "add"
+ input: "truediv_110"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_110"
+ op: "Sub"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/read"
+ input: "mul_595"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_526"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/beta"
+ input: "sub_110"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_527"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m"
+ input: "add_371"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_528"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v"
+ input: "add_372"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_596/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_596"
+ op: "Mul"
+ input: "Mul_596/x"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_597/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_597"
+ op: "Mul"
+ input: "Mul_597/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_110"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_374"
+ op: "Add"
+ input: "Mul_596"
+ input: "Mul_597"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_598/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_598"
+ op: "Mul"
+ input: "Mul_598/x"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_110"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_110"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_599/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_599"
+ op: "Mul"
+ input: "Mul_599/x"
+ input: "Square_110"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_375"
+ op: "Add"
+ input: "Mul_598"
+ input: "Mul_599"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_110"
+ op: "Sqrt"
+ input: "add_375"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_376/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_376"
+ op: "Add"
+ input: "Sqrt_110"
+ input: "add_376/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_111"
+ op: "RealDiv"
+ input: "add_374"
+ input: "add_376"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_600"
+ op: "Mul"
+ input: "add"
+ input: "truediv_111"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_111"
+ op: "Sub"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/read"
+ input: "mul_600"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_529"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma"
+ input: "sub_111"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_530"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m"
+ input: "add_374"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_531"
+ op: "Assign"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v"
+ input: "add_375"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m"
+ input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v"
+ input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_601/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_601"
+ op: "Mul"
+ input: "Mul_601/x"
+ input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_602/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_602"
+ op: "Mul"
+ input: "Mul_602/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_111"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_377"
+ op: "Add"
+ input: "Mul_601"
+ input: "Mul_602"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_603/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_603"
+ op: "Mul"
+ input: "Mul_603/x"
+ input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_111"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_111"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_604/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_604"
+ op: "Mul"
+ input: "Mul_604/x"
+ input: "Square_111"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_378"
+ op: "Add"
+ input: "Mul_603"
+ input: "Mul_604"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_111"
+ op: "Sqrt"
+ input: "add_378"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_379/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_379"
+ op: "Add"
+ input: "Sqrt_111"
+ input: "add_379/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_112"
+ op: "RealDiv"
+ input: "add_377"
+ input: "add_379"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_605/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_605"
+ op: "Mul"
+ input: "mul_605/x"
+ input: "bert/encoder/layer_6/intermediate/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_380"
+ op: "Add"
+ input: "truediv_112"
+ input: "mul_605"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_606"
+ op: "Mul"
+ input: "add"
+ input: "add_380"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_112"
+ op: "Sub"
+ input: "bert/encoder/layer_6/intermediate/dense/kernel/read"
+ input: "mul_606"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_532"
+ op: "Assign"
+ input: "bert/encoder/layer_6/intermediate/dense/kernel"
+ input: "sub_112"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_533"
+ op: "Assign"
+ input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m"
+ input: "add_377"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_534"
+ op: "Assign"
+ input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v"
+ input: "add_378"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 3072
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/bias/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/bias/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_6/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_6/intermediate/dense/bias/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/intermediate/dense/bias/adam_m"
+ input: "bert/encoder/layer_6/intermediate/dense/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/intermediate/dense/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 3072
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/bias/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/bias/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_6/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_6/intermediate/dense/bias/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/intermediate/dense/bias/adam_v"
+ input: "bert/encoder/layer_6/intermediate/dense/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/intermediate/dense/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/intermediate/dense/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_607/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_607"
+ op: "Mul"
+ input: "Mul_607/x"
+ input: "bert/encoder/layer_6/intermediate/dense/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_608/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_608"
+ op: "Mul"
+ input: "Mul_608/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_112"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_381"
+ op: "Add"
+ input: "Mul_607"
+ input: "Mul_608"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_609/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_609"
+ op: "Mul"
+ input: "Mul_609/x"
+ input: "bert/encoder/layer_6/intermediate/dense/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_112"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_112"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_610/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_610"
+ op: "Mul"
+ input: "Mul_610/x"
+ input: "Square_112"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_382"
+ op: "Add"
+ input: "Mul_609"
+ input: "Mul_610"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_112"
+ op: "Sqrt"
+ input: "add_382"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_383/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_383"
+ op: "Add"
+ input: "Sqrt_112"
+ input: "add_383/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_113"
+ op: "RealDiv"
+ input: "add_381"
+ input: "add_383"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_611"
+ op: "Mul"
+ input: "add"
+ input: "truediv_113"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_113"
+ op: "Sub"
+ input: "bert/encoder/layer_6/intermediate/dense/bias/read"
+ input: "mul_611"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_535"
+ op: "Assign"
+ input: "bert/encoder/layer_6/intermediate/dense/bias"
+ input: "sub_113"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_536"
+ op: "Assign"
+ input: "bert/encoder/layer_6/intermediate/dense/bias/adam_m"
+ input: "add_381"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_537"
+ op: "Assign"
+ input: "bert/encoder/layer_6/intermediate/dense/bias/adam_v"
+ input: "add_382"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\014\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dense/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_6/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_6/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dense/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dense/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_6/output/dense/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dense/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/output/dense/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\014\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dense/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_6/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_6/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dense/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dense/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_6/output/dense/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dense/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/output/dense/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_612/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_612"
+ op: "Mul"
+ input: "Mul_612/x"
+ input: "bert/encoder/layer_6/output/dense/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_613/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_613"
+ op: "Mul"
+ input: "Mul_613/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_113"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_384"
+ op: "Add"
+ input: "Mul_612"
+ input: "Mul_613"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_614/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_614"
+ op: "Mul"
+ input: "Mul_614/x"
+ input: "bert/encoder/layer_6/output/dense/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_113"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_113"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_615/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_615"
+ op: "Mul"
+ input: "Mul_615/x"
+ input: "Square_113"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_385"
+ op: "Add"
+ input: "Mul_614"
+ input: "Mul_615"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_113"
+ op: "Sqrt"
+ input: "add_385"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_386/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_386"
+ op: "Add"
+ input: "Sqrt_113"
+ input: "add_386/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_114"
+ op: "RealDiv"
+ input: "add_384"
+ input: "add_386"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_616/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_616"
+ op: "Mul"
+ input: "mul_616/x"
+ input: "bert/encoder/layer_6/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_387"
+ op: "Add"
+ input: "truediv_114"
+ input: "mul_616"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_617"
+ op: "Mul"
+ input: "add"
+ input: "add_387"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_114"
+ op: "Sub"
+ input: "bert/encoder/layer_6/output/dense/kernel/read"
+ input: "mul_617"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_538"
+ op: "Assign"
+ input: "bert/encoder/layer_6/output/dense/kernel"
+ input: "sub_114"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_539"
+ op: "Assign"
+ input: "bert/encoder/layer_6/output/dense/kernel/adam_m"
+ input: "add_384"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_540"
+ op: "Assign"
+ input: "bert/encoder/layer_6/output/dense/kernel/adam_v"
+ input: "add_385"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dense/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dense/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dense/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_6/output/dense/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dense/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/output/dense/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dense/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dense/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dense/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_6/output/dense/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/dense/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/output/dense/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_618/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_618"
+ op: "Mul"
+ input: "Mul_618/x"
+ input: "bert/encoder/layer_6/output/dense/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_619/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_619"
+ op: "Mul"
+ input: "Mul_619/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_114"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_388"
+ op: "Add"
+ input: "Mul_618"
+ input: "Mul_619"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_620/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_620"
+ op: "Mul"
+ input: "Mul_620/x"
+ input: "bert/encoder/layer_6/output/dense/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_114"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_114"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_621/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_621"
+ op: "Mul"
+ input: "Mul_621/x"
+ input: "Square_114"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_389"
+ op: "Add"
+ input: "Mul_620"
+ input: "Mul_621"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_114"
+ op: "Sqrt"
+ input: "add_389"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_390/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_390"
+ op: "Add"
+ input: "Sqrt_114"
+ input: "add_390/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_115"
+ op: "RealDiv"
+ input: "add_388"
+ input: "add_390"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_622"
+ op: "Mul"
+ input: "add"
+ input: "truediv_115"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_115"
+ op: "Sub"
+ input: "bert/encoder/layer_6/output/dense/bias/read"
+ input: "mul_622"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_541"
+ op: "Assign"
+ input: "bert/encoder/layer_6/output/dense/bias"
+ input: "sub_115"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_542"
+ op: "Assign"
+ input: "bert/encoder/layer_6/output/dense/bias/adam_m"
+ input: "add_388"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_543"
+ op: "Assign"
+ input: "bert/encoder/layer_6/output/dense/bias/adam_v"
+ input: "add_389"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_623/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_623"
+ op: "Mul"
+ input: "Mul_623/x"
+ input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_624/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_624"
+ op: "Mul"
+ input: "Mul_624/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_115"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_391"
+ op: "Add"
+ input: "Mul_623"
+ input: "Mul_624"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_625/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_625"
+ op: "Mul"
+ input: "Mul_625/x"
+ input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_115"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_115"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_626/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_626"
+ op: "Mul"
+ input: "Mul_626/x"
+ input: "Square_115"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_392"
+ op: "Add"
+ input: "Mul_625"
+ input: "Mul_626"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_115"
+ op: "Sqrt"
+ input: "add_392"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_393/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_393"
+ op: "Add"
+ input: "Sqrt_115"
+ input: "add_393/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_116"
+ op: "RealDiv"
+ input: "add_391"
+ input: "add_393"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_627"
+ op: "Mul"
+ input: "add"
+ input: "truediv_116"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_116"
+ op: "Sub"
+ input: "bert/encoder/layer_6/output/LayerNorm/beta/read"
+ input: "mul_627"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_544"
+ op: "Assign"
+ input: "bert/encoder/layer_6/output/LayerNorm/beta"
+ input: "sub_116"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_545"
+ op: "Assign"
+ input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m"
+ input: "add_391"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_546"
+ op: "Assign"
+ input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v"
+ input: "add_392"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_628/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_628"
+ op: "Mul"
+ input: "Mul_628/x"
+ input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_629/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_629"
+ op: "Mul"
+ input: "Mul_629/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_116"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_394"
+ op: "Add"
+ input: "Mul_628"
+ input: "Mul_629"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_630/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_630"
+ op: "Mul"
+ input: "Mul_630/x"
+ input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_116"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_116"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_631/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_631"
+ op: "Mul"
+ input: "Mul_631/x"
+ input: "Square_116"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_395"
+ op: "Add"
+ input: "Mul_630"
+ input: "Mul_631"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_116"
+ op: "Sqrt"
+ input: "add_395"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_396/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_396"
+ op: "Add"
+ input: "Sqrt_116"
+ input: "add_396/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_117"
+ op: "RealDiv"
+ input: "add_394"
+ input: "add_396"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_632"
+ op: "Mul"
+ input: "add"
+ input: "truediv_117"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_117"
+ op: "Sub"
+ input: "bert/encoder/layer_6/output/LayerNorm/gamma/read"
+ input: "mul_632"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_547"
+ op: "Assign"
+ input: "bert/encoder/layer_6/output/LayerNorm/gamma"
+ input: "sub_117"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_548"
+ op: "Assign"
+ input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m"
+ input: "add_394"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_549"
+ op: "Assign"
+ input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v"
+ input: "add_395"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/query/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/query/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_7/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_7/attention/self/query/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/query/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/query/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/query/kernel/adam_m"
+ input: "bert/encoder/layer_7/attention/self/query/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/query/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/attention/self/query/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/query/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/query/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_7/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_7/attention/self/query/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/query/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/query/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/query/kernel/adam_v"
+ input: "bert/encoder/layer_7/attention/self/query/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/query/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/attention/self/query/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_633/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_633"
+ op: "Mul"
+ input: "Mul_633/x"
+ input: "bert/encoder/layer_7/attention/self/query/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_634/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_634"
+ op: "Mul"
+ input: "Mul_634/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_117"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_397"
+ op: "Add"
+ input: "Mul_633"
+ input: "Mul_634"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_635/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_635"
+ op: "Mul"
+ input: "Mul_635/x"
+ input: "bert/encoder/layer_7/attention/self/query/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_117"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_117"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_636/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_636"
+ op: "Mul"
+ input: "Mul_636/x"
+ input: "Square_117"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_398"
+ op: "Add"
+ input: "Mul_635"
+ input: "Mul_636"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_117"
+ op: "Sqrt"
+ input: "add_398"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_399/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_399"
+ op: "Add"
+ input: "Sqrt_117"
+ input: "add_399/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_118"
+ op: "RealDiv"
+ input: "add_397"
+ input: "add_399"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_637/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_637"
+ op: "Mul"
+ input: "mul_637/x"
+ input: "bert/encoder/layer_7/attention/self/query/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_400"
+ op: "Add"
+ input: "truediv_118"
+ input: "mul_637"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_638"
+ op: "Mul"
+ input: "add"
+ input: "add_400"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_118"
+ op: "Sub"
+ input: "bert/encoder/layer_7/attention/self/query/kernel/read"
+ input: "mul_638"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_550"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/query/kernel"
+ input: "sub_118"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_551"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/query/kernel/adam_m"
+ input: "add_397"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_552"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/query/kernel/adam_v"
+ input: "add_398"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/query/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/query/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/query/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/query/bias/adam_m"
+ input: "bert/encoder/layer_7/attention/self/query/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/query/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/attention/self/query/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/query/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/query/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/query/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/query/bias/adam_v"
+ input: "bert/encoder/layer_7/attention/self/query/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/query/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/attention/self/query/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_639/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_639"
+ op: "Mul"
+ input: "Mul_639/x"
+ input: "bert/encoder/layer_7/attention/self/query/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_640/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_640"
+ op: "Mul"
+ input: "Mul_640/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_118"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_401"
+ op: "Add"
+ input: "Mul_639"
+ input: "Mul_640"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_641/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_641"
+ op: "Mul"
+ input: "Mul_641/x"
+ input: "bert/encoder/layer_7/attention/self/query/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_118"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_118"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_642/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_642"
+ op: "Mul"
+ input: "Mul_642/x"
+ input: "Square_118"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_402"
+ op: "Add"
+ input: "Mul_641"
+ input: "Mul_642"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_118"
+ op: "Sqrt"
+ input: "add_402"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_403/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_403"
+ op: "Add"
+ input: "Sqrt_118"
+ input: "add_403/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_119"
+ op: "RealDiv"
+ input: "add_401"
+ input: "add_403"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_643"
+ op: "Mul"
+ input: "add"
+ input: "truediv_119"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_119"
+ op: "Sub"
+ input: "bert/encoder/layer_7/attention/self/query/bias/read"
+ input: "mul_643"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_553"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/query/bias"
+ input: "sub_119"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_554"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/query/bias/adam_m"
+ input: "add_401"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_555"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/query/bias/adam_v"
+ input: "add_402"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/key/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/key/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_7/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_7/attention/self/key/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/key/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/key/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/key/kernel/adam_m"
+ input: "bert/encoder/layer_7/attention/self/key/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/key/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/attention/self/key/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/key/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/key/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_7/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_7/attention/self/key/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/key/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/key/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/key/kernel/adam_v"
+ input: "bert/encoder/layer_7/attention/self/key/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/key/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/attention/self/key/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_644/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_644"
+ op: "Mul"
+ input: "Mul_644/x"
+ input: "bert/encoder/layer_7/attention/self/key/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_645/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_645"
+ op: "Mul"
+ input: "Mul_645/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_119"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_404"
+ op: "Add"
+ input: "Mul_644"
+ input: "Mul_645"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_646/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_646"
+ op: "Mul"
+ input: "Mul_646/x"
+ input: "bert/encoder/layer_7/attention/self/key/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_119"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_119"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_647/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_647"
+ op: "Mul"
+ input: "Mul_647/x"
+ input: "Square_119"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_405"
+ op: "Add"
+ input: "Mul_646"
+ input: "Mul_647"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_119"
+ op: "Sqrt"
+ input: "add_405"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_406/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_406"
+ op: "Add"
+ input: "Sqrt_119"
+ input: "add_406/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_120"
+ op: "RealDiv"
+ input: "add_404"
+ input: "add_406"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_648/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_648"
+ op: "Mul"
+ input: "mul_648/x"
+ input: "bert/encoder/layer_7/attention/self/key/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_407"
+ op: "Add"
+ input: "truediv_120"
+ input: "mul_648"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_649"
+ op: "Mul"
+ input: "add"
+ input: "add_407"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_120"
+ op: "Sub"
+ input: "bert/encoder/layer_7/attention/self/key/kernel/read"
+ input: "mul_649"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_556"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/key/kernel"
+ input: "sub_120"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_557"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/key/kernel/adam_m"
+ input: "add_404"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_558"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/key/kernel/adam_v"
+ input: "add_405"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/key/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/key/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/key/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/key/bias/adam_m"
+ input: "bert/encoder/layer_7/attention/self/key/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/key/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/attention/self/key/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/key/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/key/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/key/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/key/bias/adam_v"
+ input: "bert/encoder/layer_7/attention/self/key/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/key/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/attention/self/key/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_650/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_650"
+ op: "Mul"
+ input: "Mul_650/x"
+ input: "bert/encoder/layer_7/attention/self/key/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_651/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_651"
+ op: "Mul"
+ input: "Mul_651/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_120"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_408"
+ op: "Add"
+ input: "Mul_650"
+ input: "Mul_651"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_652/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_652"
+ op: "Mul"
+ input: "Mul_652/x"
+ input: "bert/encoder/layer_7/attention/self/key/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_120"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_120"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_653/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_653"
+ op: "Mul"
+ input: "Mul_653/x"
+ input: "Square_120"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_409"
+ op: "Add"
+ input: "Mul_652"
+ input: "Mul_653"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_120"
+ op: "Sqrt"
+ input: "add_409"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_410/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_410"
+ op: "Add"
+ input: "Sqrt_120"
+ input: "add_410/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_121"
+ op: "RealDiv"
+ input: "add_408"
+ input: "add_410"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_654"
+ op: "Mul"
+ input: "add"
+ input: "truediv_121"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_121"
+ op: "Sub"
+ input: "bert/encoder/layer_7/attention/self/key/bias/read"
+ input: "mul_654"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_559"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/key/bias"
+ input: "sub_121"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_560"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/key/bias/adam_m"
+ input: "add_408"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_561"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/key/bias/adam_v"
+ input: "add_409"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/value/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/value/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_7/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_7/attention/self/value/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/value/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/value/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/value/kernel/adam_m"
+ input: "bert/encoder/layer_7/attention/self/value/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/value/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/attention/self/value/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/value/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/value/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_7/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_7/attention/self/value/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/value/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/value/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/value/kernel/adam_v"
+ input: "bert/encoder/layer_7/attention/self/value/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/value/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/attention/self/value/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_655/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_655"
+ op: "Mul"
+ input: "Mul_655/x"
+ input: "bert/encoder/layer_7/attention/self/value/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_656/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_656"
+ op: "Mul"
+ input: "Mul_656/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_121"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_411"
+ op: "Add"
+ input: "Mul_655"
+ input: "Mul_656"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_657/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_657"
+ op: "Mul"
+ input: "Mul_657/x"
+ input: "bert/encoder/layer_7/attention/self/value/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_121"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_121"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_658/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_658"
+ op: "Mul"
+ input: "Mul_658/x"
+ input: "Square_121"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_412"
+ op: "Add"
+ input: "Mul_657"
+ input: "Mul_658"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_121"
+ op: "Sqrt"
+ input: "add_412"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_413/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_413"
+ op: "Add"
+ input: "Sqrt_121"
+ input: "add_413/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_122"
+ op: "RealDiv"
+ input: "add_411"
+ input: "add_413"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_659/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_659"
+ op: "Mul"
+ input: "mul_659/x"
+ input: "bert/encoder/layer_7/attention/self/value/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_414"
+ op: "Add"
+ input: "truediv_122"
+ input: "mul_659"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_660"
+ op: "Mul"
+ input: "add"
+ input: "add_414"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_122"
+ op: "Sub"
+ input: "bert/encoder/layer_7/attention/self/value/kernel/read"
+ input: "mul_660"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_562"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/value/kernel"
+ input: "sub_122"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_563"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/value/kernel/adam_m"
+ input: "add_411"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_564"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/value/kernel/adam_v"
+ input: "add_412"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/value/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/value/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/value/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/value/bias/adam_m"
+ input: "bert/encoder/layer_7/attention/self/value/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/value/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/attention/self/value/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/value/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/value/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/value/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/value/bias/adam_v"
+ input: "bert/encoder/layer_7/attention/self/value/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/self/value/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/attention/self/value/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_661/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_661"
+ op: "Mul"
+ input: "Mul_661/x"
+ input: "bert/encoder/layer_7/attention/self/value/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_662/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_662"
+ op: "Mul"
+ input: "Mul_662/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_122"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_415"
+ op: "Add"
+ input: "Mul_661"
+ input: "Mul_662"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_663/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_663"
+ op: "Mul"
+ input: "Mul_663/x"
+ input: "bert/encoder/layer_7/attention/self/value/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_122"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_122"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_664/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_664"
+ op: "Mul"
+ input: "Mul_664/x"
+ input: "Square_122"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_416"
+ op: "Add"
+ input: "Mul_663"
+ input: "Mul_664"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_122"
+ op: "Sqrt"
+ input: "add_416"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_417/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_417"
+ op: "Add"
+ input: "Sqrt_122"
+ input: "add_417/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_123"
+ op: "RealDiv"
+ input: "add_415"
+ input: "add_417"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_665"
+ op: "Mul"
+ input: "add"
+ input: "truediv_123"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_123"
+ op: "Sub"
+ input: "bert/encoder/layer_7/attention/self/value/bias/read"
+ input: "mul_665"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_565"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/value/bias"
+ input: "sub_123"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_566"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/value/bias/adam_m"
+ input: "add_415"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_567"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/self/value/bias/adam_v"
+ input: "add_416"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_666/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_666"
+ op: "Mul"
+ input: "Mul_666/x"
+ input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_667/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_667"
+ op: "Mul"
+ input: "Mul_667/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_123"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_418"
+ op: "Add"
+ input: "Mul_666"
+ input: "Mul_667"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_668/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_668"
+ op: "Mul"
+ input: "Mul_668/x"
+ input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_123"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_123"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_669/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_669"
+ op: "Mul"
+ input: "Mul_669/x"
+ input: "Square_123"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_419"
+ op: "Add"
+ input: "Mul_668"
+ input: "Mul_669"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_123"
+ op: "Sqrt"
+ input: "add_419"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_420/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_420"
+ op: "Add"
+ input: "Sqrt_123"
+ input: "add_420/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_124"
+ op: "RealDiv"
+ input: "add_418"
+ input: "add_420"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_670/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_670"
+ op: "Mul"
+ input: "mul_670/x"
+ input: "bert/encoder/layer_7/attention/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_421"
+ op: "Add"
+ input: "truediv_124"
+ input: "mul_670"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_671"
+ op: "Mul"
+ input: "add"
+ input: "add_421"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_124"
+ op: "Sub"
+ input: "bert/encoder/layer_7/attention/output/dense/kernel/read"
+ input: "mul_671"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_568"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/output/dense/kernel"
+ input: "sub_124"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_569"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m"
+ input: "add_418"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_570"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v"
+ input: "add_419"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dense/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dense/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dense/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_7/attention/output/dense/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dense/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/attention/output/dense/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dense/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dense/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dense/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_7/attention/output/dense/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/dense/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/attention/output/dense/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_672/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_672"
+ op: "Mul"
+ input: "Mul_672/x"
+ input: "bert/encoder/layer_7/attention/output/dense/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_673/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_673"
+ op: "Mul"
+ input: "Mul_673/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_124"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_422"
+ op: "Add"
+ input: "Mul_672"
+ input: "Mul_673"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_674/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_674"
+ op: "Mul"
+ input: "Mul_674/x"
+ input: "bert/encoder/layer_7/attention/output/dense/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_124"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_124"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_675/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_675"
+ op: "Mul"
+ input: "Mul_675/x"
+ input: "Square_124"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_423"
+ op: "Add"
+ input: "Mul_674"
+ input: "Mul_675"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_124"
+ op: "Sqrt"
+ input: "add_423"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_424/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_424"
+ op: "Add"
+ input: "Sqrt_124"
+ input: "add_424/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_125"
+ op: "RealDiv"
+ input: "add_422"
+ input: "add_424"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_676"
+ op: "Mul"
+ input: "add"
+ input: "truediv_125"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_125"
+ op: "Sub"
+ input: "bert/encoder/layer_7/attention/output/dense/bias/read"
+ input: "mul_676"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_571"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/output/dense/bias"
+ input: "sub_125"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_572"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/output/dense/bias/adam_m"
+ input: "add_422"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_573"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/output/dense/bias/adam_v"
+ input: "add_423"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_677/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_677"
+ op: "Mul"
+ input: "Mul_677/x"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_678/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_678"
+ op: "Mul"
+ input: "Mul_678/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_125"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_425"
+ op: "Add"
+ input: "Mul_677"
+ input: "Mul_678"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_679/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_679"
+ op: "Mul"
+ input: "Mul_679/x"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_125"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_125"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_680/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_680"
+ op: "Mul"
+ input: "Mul_680/x"
+ input: "Square_125"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_426"
+ op: "Add"
+ input: "Mul_679"
+ input: "Mul_680"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_125"
+ op: "Sqrt"
+ input: "add_426"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_427/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_427"
+ op: "Add"
+ input: "Sqrt_125"
+ input: "add_427/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_126"
+ op: "RealDiv"
+ input: "add_425"
+ input: "add_427"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_681"
+ op: "Mul"
+ input: "add"
+ input: "truediv_126"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_126"
+ op: "Sub"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/read"
+ input: "mul_681"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_574"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/beta"
+ input: "sub_126"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_575"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m"
+ input: "add_425"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_576"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v"
+ input: "add_426"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_682/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_682"
+ op: "Mul"
+ input: "Mul_682/x"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_683/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_683"
+ op: "Mul"
+ input: "Mul_683/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_126"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_428"
+ op: "Add"
+ input: "Mul_682"
+ input: "Mul_683"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_684/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_684"
+ op: "Mul"
+ input: "Mul_684/x"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_126"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_126"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_685/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_685"
+ op: "Mul"
+ input: "Mul_685/x"
+ input: "Square_126"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_429"
+ op: "Add"
+ input: "Mul_684"
+ input: "Mul_685"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_126"
+ op: "Sqrt"
+ input: "add_429"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_430/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_430"
+ op: "Add"
+ input: "Sqrt_126"
+ input: "add_430/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_127"
+ op: "RealDiv"
+ input: "add_428"
+ input: "add_430"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_686"
+ op: "Mul"
+ input: "add"
+ input: "truediv_127"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_127"
+ op: "Sub"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/read"
+ input: "mul_686"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_577"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma"
+ input: "sub_127"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_578"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m"
+ input: "add_428"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_579"
+ op: "Assign"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v"
+ input: "add_429"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m"
+ input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v"
+ input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_687/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_687"
+ op: "Mul"
+ input: "Mul_687/x"
+ input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_688/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_688"
+ op: "Mul"
+ input: "Mul_688/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_127"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_431"
+ op: "Add"
+ input: "Mul_687"
+ input: "Mul_688"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_689/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_689"
+ op: "Mul"
+ input: "Mul_689/x"
+ input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_127"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_127"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_690/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_690"
+ op: "Mul"
+ input: "Mul_690/x"
+ input: "Square_127"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_432"
+ op: "Add"
+ input: "Mul_689"
+ input: "Mul_690"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_127"
+ op: "Sqrt"
+ input: "add_432"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_433/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_433"
+ op: "Add"
+ input: "Sqrt_127"
+ input: "add_433/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_128"
+ op: "RealDiv"
+ input: "add_431"
+ input: "add_433"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_691/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_691"
+ op: "Mul"
+ input: "mul_691/x"
+ input: "bert/encoder/layer_7/intermediate/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_434"
+ op: "Add"
+ input: "truediv_128"
+ input: "mul_691"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_692"
+ op: "Mul"
+ input: "add"
+ input: "add_434"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_128"
+ op: "Sub"
+ input: "bert/encoder/layer_7/intermediate/dense/kernel/read"
+ input: "mul_692"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_580"
+ op: "Assign"
+ input: "bert/encoder/layer_7/intermediate/dense/kernel"
+ input: "sub_128"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_581"
+ op: "Assign"
+ input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m"
+ input: "add_431"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_582"
+ op: "Assign"
+ input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v"
+ input: "add_432"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 3072
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/bias/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/bias/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_7/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_7/intermediate/dense/bias/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/intermediate/dense/bias/adam_m"
+ input: "bert/encoder/layer_7/intermediate/dense/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/intermediate/dense/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 3072
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/bias/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/bias/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_7/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_7/intermediate/dense/bias/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/intermediate/dense/bias/adam_v"
+ input: "bert/encoder/layer_7/intermediate/dense/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/intermediate/dense/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/intermediate/dense/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_693/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_693"
+ op: "Mul"
+ input: "Mul_693/x"
+ input: "bert/encoder/layer_7/intermediate/dense/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_694/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_694"
+ op: "Mul"
+ input: "Mul_694/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_128"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_435"
+ op: "Add"
+ input: "Mul_693"
+ input: "Mul_694"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_695/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_695"
+ op: "Mul"
+ input: "Mul_695/x"
+ input: "bert/encoder/layer_7/intermediate/dense/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_128"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_128"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_696/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_696"
+ op: "Mul"
+ input: "Mul_696/x"
+ input: "Square_128"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_436"
+ op: "Add"
+ input: "Mul_695"
+ input: "Mul_696"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_128"
+ op: "Sqrt"
+ input: "add_436"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_437/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_437"
+ op: "Add"
+ input: "Sqrt_128"
+ input: "add_437/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_129"
+ op: "RealDiv"
+ input: "add_435"
+ input: "add_437"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_697"
+ op: "Mul"
+ input: "add"
+ input: "truediv_129"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_129"
+ op: "Sub"
+ input: "bert/encoder/layer_7/intermediate/dense/bias/read"
+ input: "mul_697"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_583"
+ op: "Assign"
+ input: "bert/encoder/layer_7/intermediate/dense/bias"
+ input: "sub_129"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_584"
+ op: "Assign"
+ input: "bert/encoder/layer_7/intermediate/dense/bias/adam_m"
+ input: "add_435"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_585"
+ op: "Assign"
+ input: "bert/encoder/layer_7/intermediate/dense/bias/adam_v"
+ input: "add_436"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\014\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dense/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_7/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_7/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dense/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dense/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_7/output/dense/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dense/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/output/dense/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\014\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dense/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_7/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_7/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dense/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dense/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_7/output/dense/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dense/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/output/dense/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_698/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_698"
+ op: "Mul"
+ input: "Mul_698/x"
+ input: "bert/encoder/layer_7/output/dense/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_699/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_699"
+ op: "Mul"
+ input: "Mul_699/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_129"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_438"
+ op: "Add"
+ input: "Mul_698"
+ input: "Mul_699"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_700/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_700"
+ op: "Mul"
+ input: "Mul_700/x"
+ input: "bert/encoder/layer_7/output/dense/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_129"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_129"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_701/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_701"
+ op: "Mul"
+ input: "Mul_701/x"
+ input: "Square_129"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_439"
+ op: "Add"
+ input: "Mul_700"
+ input: "Mul_701"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_129"
+ op: "Sqrt"
+ input: "add_439"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_440/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_440"
+ op: "Add"
+ input: "Sqrt_129"
+ input: "add_440/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_130"
+ op: "RealDiv"
+ input: "add_438"
+ input: "add_440"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_702/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_702"
+ op: "Mul"
+ input: "mul_702/x"
+ input: "bert/encoder/layer_7/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_441"
+ op: "Add"
+ input: "truediv_130"
+ input: "mul_702"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_703"
+ op: "Mul"
+ input: "add"
+ input: "add_441"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_130"
+ op: "Sub"
+ input: "bert/encoder/layer_7/output/dense/kernel/read"
+ input: "mul_703"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_586"
+ op: "Assign"
+ input: "bert/encoder/layer_7/output/dense/kernel"
+ input: "sub_130"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_587"
+ op: "Assign"
+ input: "bert/encoder/layer_7/output/dense/kernel/adam_m"
+ input: "add_438"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_588"
+ op: "Assign"
+ input: "bert/encoder/layer_7/output/dense/kernel/adam_v"
+ input: "add_439"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dense/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dense/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dense/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_7/output/dense/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dense/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/output/dense/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dense/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dense/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dense/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_7/output/dense/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/dense/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/output/dense/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_704/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_704"
+ op: "Mul"
+ input: "Mul_704/x"
+ input: "bert/encoder/layer_7/output/dense/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_705/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_705"
+ op: "Mul"
+ input: "Mul_705/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_130"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_442"
+ op: "Add"
+ input: "Mul_704"
+ input: "Mul_705"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_706/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_706"
+ op: "Mul"
+ input: "Mul_706/x"
+ input: "bert/encoder/layer_7/output/dense/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_130"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_130"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_707/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_707"
+ op: "Mul"
+ input: "Mul_707/x"
+ input: "Square_130"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_443"
+ op: "Add"
+ input: "Mul_706"
+ input: "Mul_707"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_130"
+ op: "Sqrt"
+ input: "add_443"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_444/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_444"
+ op: "Add"
+ input: "Sqrt_130"
+ input: "add_444/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_131"
+ op: "RealDiv"
+ input: "add_442"
+ input: "add_444"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_708"
+ op: "Mul"
+ input: "add"
+ input: "truediv_131"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_131"
+ op: "Sub"
+ input: "bert/encoder/layer_7/output/dense/bias/read"
+ input: "mul_708"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_589"
+ op: "Assign"
+ input: "bert/encoder/layer_7/output/dense/bias"
+ input: "sub_131"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_590"
+ op: "Assign"
+ input: "bert/encoder/layer_7/output/dense/bias/adam_m"
+ input: "add_442"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_591"
+ op: "Assign"
+ input: "bert/encoder/layer_7/output/dense/bias/adam_v"
+ input: "add_443"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_709/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_709"
+ op: "Mul"
+ input: "Mul_709/x"
+ input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_710/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_710"
+ op: "Mul"
+ input: "Mul_710/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_131"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_445"
+ op: "Add"
+ input: "Mul_709"
+ input: "Mul_710"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_711/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_711"
+ op: "Mul"
+ input: "Mul_711/x"
+ input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_131"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_131"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_712/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_712"
+ op: "Mul"
+ input: "Mul_712/x"
+ input: "Square_131"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_446"
+ op: "Add"
+ input: "Mul_711"
+ input: "Mul_712"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_131"
+ op: "Sqrt"
+ input: "add_446"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_447/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_447"
+ op: "Add"
+ input: "Sqrt_131"
+ input: "add_447/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_132"
+ op: "RealDiv"
+ input: "add_445"
+ input: "add_447"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_713"
+ op: "Mul"
+ input: "add"
+ input: "truediv_132"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_132"
+ op: "Sub"
+ input: "bert/encoder/layer_7/output/LayerNorm/beta/read"
+ input: "mul_713"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_592"
+ op: "Assign"
+ input: "bert/encoder/layer_7/output/LayerNorm/beta"
+ input: "sub_132"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_593"
+ op: "Assign"
+ input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m"
+ input: "add_445"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_594"
+ op: "Assign"
+ input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v"
+ input: "add_446"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_714/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_714"
+ op: "Mul"
+ input: "Mul_714/x"
+ input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_715/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_715"
+ op: "Mul"
+ input: "Mul_715/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_132"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_448"
+ op: "Add"
+ input: "Mul_714"
+ input: "Mul_715"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_716/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_716"
+ op: "Mul"
+ input: "Mul_716/x"
+ input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_132"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_132"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_717/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_717"
+ op: "Mul"
+ input: "Mul_717/x"
+ input: "Square_132"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_449"
+ op: "Add"
+ input: "Mul_716"
+ input: "Mul_717"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_132"
+ op: "Sqrt"
+ input: "add_449"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_450/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_450"
+ op: "Add"
+ input: "Sqrt_132"
+ input: "add_450/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_133"
+ op: "RealDiv"
+ input: "add_448"
+ input: "add_450"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_718"
+ op: "Mul"
+ input: "add"
+ input: "truediv_133"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_133"
+ op: "Sub"
+ input: "bert/encoder/layer_7/output/LayerNorm/gamma/read"
+ input: "mul_718"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_595"
+ op: "Assign"
+ input: "bert/encoder/layer_7/output/LayerNorm/gamma"
+ input: "sub_133"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_596"
+ op: "Assign"
+ input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m"
+ input: "add_448"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_597"
+ op: "Assign"
+ input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v"
+ input: "add_449"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/query/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/query/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_8/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_8/attention/self/query/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/query/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/query/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/query/kernel/adam_m"
+ input: "bert/encoder/layer_8/attention/self/query/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/query/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/attention/self/query/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/query/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/query/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_8/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_8/attention/self/query/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/query/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/query/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/query/kernel/adam_v"
+ input: "bert/encoder/layer_8/attention/self/query/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/query/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/attention/self/query/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_719/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_719"
+ op: "Mul"
+ input: "Mul_719/x"
+ input: "bert/encoder/layer_8/attention/self/query/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_720/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_720"
+ op: "Mul"
+ input: "Mul_720/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_133"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_451"
+ op: "Add"
+ input: "Mul_719"
+ input: "Mul_720"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_721/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_721"
+ op: "Mul"
+ input: "Mul_721/x"
+ input: "bert/encoder/layer_8/attention/self/query/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_133"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_133"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_722/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_722"
+ op: "Mul"
+ input: "Mul_722/x"
+ input: "Square_133"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_452"
+ op: "Add"
+ input: "Mul_721"
+ input: "Mul_722"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_133"
+ op: "Sqrt"
+ input: "add_452"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_453/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_453"
+ op: "Add"
+ input: "Sqrt_133"
+ input: "add_453/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_134"
+ op: "RealDiv"
+ input: "add_451"
+ input: "add_453"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_723/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_723"
+ op: "Mul"
+ input: "mul_723/x"
+ input: "bert/encoder/layer_8/attention/self/query/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_454"
+ op: "Add"
+ input: "truediv_134"
+ input: "mul_723"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_724"
+ op: "Mul"
+ input: "add"
+ input: "add_454"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_134"
+ op: "Sub"
+ input: "bert/encoder/layer_8/attention/self/query/kernel/read"
+ input: "mul_724"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_598"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/query/kernel"
+ input: "sub_134"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_599"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/query/kernel/adam_m"
+ input: "add_451"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_600"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/query/kernel/adam_v"
+ input: "add_452"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/query/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/query/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/query/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/query/bias/adam_m"
+ input: "bert/encoder/layer_8/attention/self/query/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/query/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/attention/self/query/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/query/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/query/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/query/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/query/bias/adam_v"
+ input: "bert/encoder/layer_8/attention/self/query/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/query/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/attention/self/query/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_725/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_725"
+ op: "Mul"
+ input: "Mul_725/x"
+ input: "bert/encoder/layer_8/attention/self/query/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_726/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_726"
+ op: "Mul"
+ input: "Mul_726/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_134"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_455"
+ op: "Add"
+ input: "Mul_725"
+ input: "Mul_726"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_727/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_727"
+ op: "Mul"
+ input: "Mul_727/x"
+ input: "bert/encoder/layer_8/attention/self/query/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_134"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_134"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_728/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_728"
+ op: "Mul"
+ input: "Mul_728/x"
+ input: "Square_134"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_456"
+ op: "Add"
+ input: "Mul_727"
+ input: "Mul_728"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_134"
+ op: "Sqrt"
+ input: "add_456"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_457/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_457"
+ op: "Add"
+ input: "Sqrt_134"
+ input: "add_457/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_135"
+ op: "RealDiv"
+ input: "add_455"
+ input: "add_457"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_729"
+ op: "Mul"
+ input: "add"
+ input: "truediv_135"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_135"
+ op: "Sub"
+ input: "bert/encoder/layer_8/attention/self/query/bias/read"
+ input: "mul_729"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_601"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/query/bias"
+ input: "sub_135"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_602"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/query/bias/adam_m"
+ input: "add_455"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_603"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/query/bias/adam_v"
+ input: "add_456"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/key/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/key/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_8/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_8/attention/self/key/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/key/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/key/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/key/kernel/adam_m"
+ input: "bert/encoder/layer_8/attention/self/key/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/key/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/attention/self/key/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/key/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/key/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_8/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_8/attention/self/key/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/key/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/key/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/key/kernel/adam_v"
+ input: "bert/encoder/layer_8/attention/self/key/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/key/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/attention/self/key/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_730/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_730"
+ op: "Mul"
+ input: "Mul_730/x"
+ input: "bert/encoder/layer_8/attention/self/key/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_731/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_731"
+ op: "Mul"
+ input: "Mul_731/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_135"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_458"
+ op: "Add"
+ input: "Mul_730"
+ input: "Mul_731"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_732/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_732"
+ op: "Mul"
+ input: "Mul_732/x"
+ input: "bert/encoder/layer_8/attention/self/key/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_135"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_135"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_733/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_733"
+ op: "Mul"
+ input: "Mul_733/x"
+ input: "Square_135"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_459"
+ op: "Add"
+ input: "Mul_732"
+ input: "Mul_733"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_135"
+ op: "Sqrt"
+ input: "add_459"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_460/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_460"
+ op: "Add"
+ input: "Sqrt_135"
+ input: "add_460/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_136"
+ op: "RealDiv"
+ input: "add_458"
+ input: "add_460"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_734/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_734"
+ op: "Mul"
+ input: "mul_734/x"
+ input: "bert/encoder/layer_8/attention/self/key/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_461"
+ op: "Add"
+ input: "truediv_136"
+ input: "mul_734"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_735"
+ op: "Mul"
+ input: "add"
+ input: "add_461"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_136"
+ op: "Sub"
+ input: "bert/encoder/layer_8/attention/self/key/kernel/read"
+ input: "mul_735"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_604"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/key/kernel"
+ input: "sub_136"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_605"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/key/kernel/adam_m"
+ input: "add_458"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_606"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/key/kernel/adam_v"
+ input: "add_459"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/key/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/key/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/key/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/key/bias/adam_m"
+ input: "bert/encoder/layer_8/attention/self/key/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/key/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/attention/self/key/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/key/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/key/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/key/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/key/bias/adam_v"
+ input: "bert/encoder/layer_8/attention/self/key/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/key/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/attention/self/key/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_736/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_736"
+ op: "Mul"
+ input: "Mul_736/x"
+ input: "bert/encoder/layer_8/attention/self/key/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_737/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_737"
+ op: "Mul"
+ input: "Mul_737/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_136"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_462"
+ op: "Add"
+ input: "Mul_736"
+ input: "Mul_737"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_738/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_738"
+ op: "Mul"
+ input: "Mul_738/x"
+ input: "bert/encoder/layer_8/attention/self/key/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_136"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_136"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_739/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_739"
+ op: "Mul"
+ input: "Mul_739/x"
+ input: "Square_136"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_463"
+ op: "Add"
+ input: "Mul_738"
+ input: "Mul_739"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_136"
+ op: "Sqrt"
+ input: "add_463"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_464/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_464"
+ op: "Add"
+ input: "Sqrt_136"
+ input: "add_464/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_137"
+ op: "RealDiv"
+ input: "add_462"
+ input: "add_464"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_740"
+ op: "Mul"
+ input: "add"
+ input: "truediv_137"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_137"
+ op: "Sub"
+ input: "bert/encoder/layer_8/attention/self/key/bias/read"
+ input: "mul_740"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_607"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/key/bias"
+ input: "sub_137"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_608"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/key/bias/adam_m"
+ input: "add_462"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_609"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/key/bias/adam_v"
+ input: "add_463"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/value/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/value/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_8/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_8/attention/self/value/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/value/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/value/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/value/kernel/adam_m"
+ input: "bert/encoder/layer_8/attention/self/value/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/value/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/attention/self/value/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/value/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/value/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_8/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_8/attention/self/value/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/value/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/value/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/value/kernel/adam_v"
+ input: "bert/encoder/layer_8/attention/self/value/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/value/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/attention/self/value/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_741/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_741"
+ op: "Mul"
+ input: "Mul_741/x"
+ input: "bert/encoder/layer_8/attention/self/value/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_742/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_742"
+ op: "Mul"
+ input: "Mul_742/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_137"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_465"
+ op: "Add"
+ input: "Mul_741"
+ input: "Mul_742"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_743/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_743"
+ op: "Mul"
+ input: "Mul_743/x"
+ input: "bert/encoder/layer_8/attention/self/value/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_137"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_137"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_744/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_744"
+ op: "Mul"
+ input: "Mul_744/x"
+ input: "Square_137"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_466"
+ op: "Add"
+ input: "Mul_743"
+ input: "Mul_744"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_137"
+ op: "Sqrt"
+ input: "add_466"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_467/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_467"
+ op: "Add"
+ input: "Sqrt_137"
+ input: "add_467/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_138"
+ op: "RealDiv"
+ input: "add_465"
+ input: "add_467"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_745/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_745"
+ op: "Mul"
+ input: "mul_745/x"
+ input: "bert/encoder/layer_8/attention/self/value/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_468"
+ op: "Add"
+ input: "truediv_138"
+ input: "mul_745"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_746"
+ op: "Mul"
+ input: "add"
+ input: "add_468"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_138"
+ op: "Sub"
+ input: "bert/encoder/layer_8/attention/self/value/kernel/read"
+ input: "mul_746"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_610"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/value/kernel"
+ input: "sub_138"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_611"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/value/kernel/adam_m"
+ input: "add_465"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_612"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/value/kernel/adam_v"
+ input: "add_466"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/value/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/value/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/value/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/value/bias/adam_m"
+ input: "bert/encoder/layer_8/attention/self/value/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/value/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/attention/self/value/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/value/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/value/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/value/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/value/bias/adam_v"
+ input: "bert/encoder/layer_8/attention/self/value/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/self/value/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/attention/self/value/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_747/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_747"
+ op: "Mul"
+ input: "Mul_747/x"
+ input: "bert/encoder/layer_8/attention/self/value/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_748/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_748"
+ op: "Mul"
+ input: "Mul_748/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_138"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_469"
+ op: "Add"
+ input: "Mul_747"
+ input: "Mul_748"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_749/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_749"
+ op: "Mul"
+ input: "Mul_749/x"
+ input: "bert/encoder/layer_8/attention/self/value/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_138"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_138"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_750/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_750"
+ op: "Mul"
+ input: "Mul_750/x"
+ input: "Square_138"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_470"
+ op: "Add"
+ input: "Mul_749"
+ input: "Mul_750"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_138"
+ op: "Sqrt"
+ input: "add_470"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_471/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_471"
+ op: "Add"
+ input: "Sqrt_138"
+ input: "add_471/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_139"
+ op: "RealDiv"
+ input: "add_469"
+ input: "add_471"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_751"
+ op: "Mul"
+ input: "add"
+ input: "truediv_139"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_139"
+ op: "Sub"
+ input: "bert/encoder/layer_8/attention/self/value/bias/read"
+ input: "mul_751"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_613"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/value/bias"
+ input: "sub_139"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_614"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/value/bias/adam_m"
+ input: "add_469"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_615"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/self/value/bias/adam_v"
+ input: "add_470"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_752/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_752"
+ op: "Mul"
+ input: "Mul_752/x"
+ input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_753/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_753"
+ op: "Mul"
+ input: "Mul_753/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_139"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_472"
+ op: "Add"
+ input: "Mul_752"
+ input: "Mul_753"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_754/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_754"
+ op: "Mul"
+ input: "Mul_754/x"
+ input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_139"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_139"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_755/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_755"
+ op: "Mul"
+ input: "Mul_755/x"
+ input: "Square_139"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_473"
+ op: "Add"
+ input: "Mul_754"
+ input: "Mul_755"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_139"
+ op: "Sqrt"
+ input: "add_473"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_474/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_474"
+ op: "Add"
+ input: "Sqrt_139"
+ input: "add_474/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_140"
+ op: "RealDiv"
+ input: "add_472"
+ input: "add_474"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_756/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_756"
+ op: "Mul"
+ input: "mul_756/x"
+ input: "bert/encoder/layer_8/attention/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_475"
+ op: "Add"
+ input: "truediv_140"
+ input: "mul_756"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_757"
+ op: "Mul"
+ input: "add"
+ input: "add_475"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_140"
+ op: "Sub"
+ input: "bert/encoder/layer_8/attention/output/dense/kernel/read"
+ input: "mul_757"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_616"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/output/dense/kernel"
+ input: "sub_140"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_617"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m"
+ input: "add_472"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_618"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v"
+ input: "add_473"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dense/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dense/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dense/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_8/attention/output/dense/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dense/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/attention/output/dense/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dense/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dense/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dense/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_8/attention/output/dense/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/dense/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/attention/output/dense/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_758/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_758"
+ op: "Mul"
+ input: "Mul_758/x"
+ input: "bert/encoder/layer_8/attention/output/dense/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_759/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_759"
+ op: "Mul"
+ input: "Mul_759/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_140"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_476"
+ op: "Add"
+ input: "Mul_758"
+ input: "Mul_759"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_760/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_760"
+ op: "Mul"
+ input: "Mul_760/x"
+ input: "bert/encoder/layer_8/attention/output/dense/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_140"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_140"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_761/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_761"
+ op: "Mul"
+ input: "Mul_761/x"
+ input: "Square_140"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_477"
+ op: "Add"
+ input: "Mul_760"
+ input: "Mul_761"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_140"
+ op: "Sqrt"
+ input: "add_477"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_478/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_478"
+ op: "Add"
+ input: "Sqrt_140"
+ input: "add_478/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_141"
+ op: "RealDiv"
+ input: "add_476"
+ input: "add_478"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_762"
+ op: "Mul"
+ input: "add"
+ input: "truediv_141"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_141"
+ op: "Sub"
+ input: "bert/encoder/layer_8/attention/output/dense/bias/read"
+ input: "mul_762"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_619"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/output/dense/bias"
+ input: "sub_141"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_620"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/output/dense/bias/adam_m"
+ input: "add_476"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_621"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/output/dense/bias/adam_v"
+ input: "add_477"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_763/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_763"
+ op: "Mul"
+ input: "Mul_763/x"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_764/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_764"
+ op: "Mul"
+ input: "Mul_764/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_141"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_479"
+ op: "Add"
+ input: "Mul_763"
+ input: "Mul_764"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_765/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_765"
+ op: "Mul"
+ input: "Mul_765/x"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_141"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_141"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_766/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_766"
+ op: "Mul"
+ input: "Mul_766/x"
+ input: "Square_141"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_480"
+ op: "Add"
+ input: "Mul_765"
+ input: "Mul_766"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_141"
+ op: "Sqrt"
+ input: "add_480"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_481/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_481"
+ op: "Add"
+ input: "Sqrt_141"
+ input: "add_481/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_142"
+ op: "RealDiv"
+ input: "add_479"
+ input: "add_481"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_767"
+ op: "Mul"
+ input: "add"
+ input: "truediv_142"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_142"
+ op: "Sub"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/read"
+ input: "mul_767"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_622"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/beta"
+ input: "sub_142"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_623"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m"
+ input: "add_479"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_624"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v"
+ input: "add_480"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_768/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_768"
+ op: "Mul"
+ input: "Mul_768/x"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_769/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_769"
+ op: "Mul"
+ input: "Mul_769/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_142"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_482"
+ op: "Add"
+ input: "Mul_768"
+ input: "Mul_769"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_770/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_770"
+ op: "Mul"
+ input: "Mul_770/x"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_142"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_142"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_771/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_771"
+ op: "Mul"
+ input: "Mul_771/x"
+ input: "Square_142"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_483"
+ op: "Add"
+ input: "Mul_770"
+ input: "Mul_771"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_142"
+ op: "Sqrt"
+ input: "add_483"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_484/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_484"
+ op: "Add"
+ input: "Sqrt_142"
+ input: "add_484/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_143"
+ op: "RealDiv"
+ input: "add_482"
+ input: "add_484"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_772"
+ op: "Mul"
+ input: "add"
+ input: "truediv_143"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_143"
+ op: "Sub"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/read"
+ input: "mul_772"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_625"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma"
+ input: "sub_143"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_626"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m"
+ input: "add_482"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_627"
+ op: "Assign"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v"
+ input: "add_483"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m"
+ input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v"
+ input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_773/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_773"
+ op: "Mul"
+ input: "Mul_773/x"
+ input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_774/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_774"
+ op: "Mul"
+ input: "Mul_774/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_143"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_485"
+ op: "Add"
+ input: "Mul_773"
+ input: "Mul_774"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_775/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_775"
+ op: "Mul"
+ input: "Mul_775/x"
+ input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_143"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_143"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_776/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_776"
+ op: "Mul"
+ input: "Mul_776/x"
+ input: "Square_143"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_486"
+ op: "Add"
+ input: "Mul_775"
+ input: "Mul_776"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_143"
+ op: "Sqrt"
+ input: "add_486"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_487/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_487"
+ op: "Add"
+ input: "Sqrt_143"
+ input: "add_487/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_144"
+ op: "RealDiv"
+ input: "add_485"
+ input: "add_487"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_777/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_777"
+ op: "Mul"
+ input: "mul_777/x"
+ input: "bert/encoder/layer_8/intermediate/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_488"
+ op: "Add"
+ input: "truediv_144"
+ input: "mul_777"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_778"
+ op: "Mul"
+ input: "add"
+ input: "add_488"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_144"
+ op: "Sub"
+ input: "bert/encoder/layer_8/intermediate/dense/kernel/read"
+ input: "mul_778"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_628"
+ op: "Assign"
+ input: "bert/encoder/layer_8/intermediate/dense/kernel"
+ input: "sub_144"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_629"
+ op: "Assign"
+ input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m"
+ input: "add_485"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_630"
+ op: "Assign"
+ input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v"
+ input: "add_486"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 3072
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/bias/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/bias/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_8/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_8/intermediate/dense/bias/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/intermediate/dense/bias/adam_m"
+ input: "bert/encoder/layer_8/intermediate/dense/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/intermediate/dense/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 3072
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/bias/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/bias/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_8/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_8/intermediate/dense/bias/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/intermediate/dense/bias/adam_v"
+ input: "bert/encoder/layer_8/intermediate/dense/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/intermediate/dense/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/intermediate/dense/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_779/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_779"
+ op: "Mul"
+ input: "Mul_779/x"
+ input: "bert/encoder/layer_8/intermediate/dense/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_780/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_780"
+ op: "Mul"
+ input: "Mul_780/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_144"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_489"
+ op: "Add"
+ input: "Mul_779"
+ input: "Mul_780"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_781/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_781"
+ op: "Mul"
+ input: "Mul_781/x"
+ input: "bert/encoder/layer_8/intermediate/dense/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_144"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_144"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_782/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_782"
+ op: "Mul"
+ input: "Mul_782/x"
+ input: "Square_144"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_490"
+ op: "Add"
+ input: "Mul_781"
+ input: "Mul_782"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_144"
+ op: "Sqrt"
+ input: "add_490"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_491/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_491"
+ op: "Add"
+ input: "Sqrt_144"
+ input: "add_491/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_145"
+ op: "RealDiv"
+ input: "add_489"
+ input: "add_491"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_783"
+ op: "Mul"
+ input: "add"
+ input: "truediv_145"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_145"
+ op: "Sub"
+ input: "bert/encoder/layer_8/intermediate/dense/bias/read"
+ input: "mul_783"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_631"
+ op: "Assign"
+ input: "bert/encoder/layer_8/intermediate/dense/bias"
+ input: "sub_145"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_632"
+ op: "Assign"
+ input: "bert/encoder/layer_8/intermediate/dense/bias/adam_m"
+ input: "add_489"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_633"
+ op: "Assign"
+ input: "bert/encoder/layer_8/intermediate/dense/bias/adam_v"
+ input: "add_490"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\014\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dense/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_8/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_8/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dense/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dense/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_8/output/dense/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dense/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/output/dense/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\014\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dense/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_8/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_8/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dense/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dense/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_8/output/dense/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dense/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/output/dense/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_784/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_784"
+ op: "Mul"
+ input: "Mul_784/x"
+ input: "bert/encoder/layer_8/output/dense/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_785/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_785"
+ op: "Mul"
+ input: "Mul_785/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_145"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_492"
+ op: "Add"
+ input: "Mul_784"
+ input: "Mul_785"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_786/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_786"
+ op: "Mul"
+ input: "Mul_786/x"
+ input: "bert/encoder/layer_8/output/dense/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_145"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_145"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_787/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_787"
+ op: "Mul"
+ input: "Mul_787/x"
+ input: "Square_145"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_493"
+ op: "Add"
+ input: "Mul_786"
+ input: "Mul_787"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_145"
+ op: "Sqrt"
+ input: "add_493"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_494/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_494"
+ op: "Add"
+ input: "Sqrt_145"
+ input: "add_494/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_146"
+ op: "RealDiv"
+ input: "add_492"
+ input: "add_494"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_788/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_788"
+ op: "Mul"
+ input: "mul_788/x"
+ input: "bert/encoder/layer_8/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_495"
+ op: "Add"
+ input: "truediv_146"
+ input: "mul_788"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_789"
+ op: "Mul"
+ input: "add"
+ input: "add_495"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_146"
+ op: "Sub"
+ input: "bert/encoder/layer_8/output/dense/kernel/read"
+ input: "mul_789"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_634"
+ op: "Assign"
+ input: "bert/encoder/layer_8/output/dense/kernel"
+ input: "sub_146"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_635"
+ op: "Assign"
+ input: "bert/encoder/layer_8/output/dense/kernel/adam_m"
+ input: "add_492"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_636"
+ op: "Assign"
+ input: "bert/encoder/layer_8/output/dense/kernel/adam_v"
+ input: "add_493"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dense/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dense/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dense/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_8/output/dense/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dense/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/output/dense/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dense/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dense/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dense/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_8/output/dense/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/dense/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/output/dense/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_790/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_790"
+ op: "Mul"
+ input: "Mul_790/x"
+ input: "bert/encoder/layer_8/output/dense/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_791/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_791"
+ op: "Mul"
+ input: "Mul_791/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_146"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_496"
+ op: "Add"
+ input: "Mul_790"
+ input: "Mul_791"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_792/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_792"
+ op: "Mul"
+ input: "Mul_792/x"
+ input: "bert/encoder/layer_8/output/dense/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_146"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_146"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_793/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_793"
+ op: "Mul"
+ input: "Mul_793/x"
+ input: "Square_146"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_497"
+ op: "Add"
+ input: "Mul_792"
+ input: "Mul_793"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_146"
+ op: "Sqrt"
+ input: "add_497"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_498/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_498"
+ op: "Add"
+ input: "Sqrt_146"
+ input: "add_498/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_147"
+ op: "RealDiv"
+ input: "add_496"
+ input: "add_498"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_794"
+ op: "Mul"
+ input: "add"
+ input: "truediv_147"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_147"
+ op: "Sub"
+ input: "bert/encoder/layer_8/output/dense/bias/read"
+ input: "mul_794"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_637"
+ op: "Assign"
+ input: "bert/encoder/layer_8/output/dense/bias"
+ input: "sub_147"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_638"
+ op: "Assign"
+ input: "bert/encoder/layer_8/output/dense/bias/adam_m"
+ input: "add_496"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_639"
+ op: "Assign"
+ input: "bert/encoder/layer_8/output/dense/bias/adam_v"
+ input: "add_497"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_795/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_795"
+ op: "Mul"
+ input: "Mul_795/x"
+ input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_796/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_796"
+ op: "Mul"
+ input: "Mul_796/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_147"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_499"
+ op: "Add"
+ input: "Mul_795"
+ input: "Mul_796"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_797/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_797"
+ op: "Mul"
+ input: "Mul_797/x"
+ input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_147"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_147"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_798/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_798"
+ op: "Mul"
+ input: "Mul_798/x"
+ input: "Square_147"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_500"
+ op: "Add"
+ input: "Mul_797"
+ input: "Mul_798"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_147"
+ op: "Sqrt"
+ input: "add_500"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_501/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_501"
+ op: "Add"
+ input: "Sqrt_147"
+ input: "add_501/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_148"
+ op: "RealDiv"
+ input: "add_499"
+ input: "add_501"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_799"
+ op: "Mul"
+ input: "add"
+ input: "truediv_148"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_148"
+ op: "Sub"
+ input: "bert/encoder/layer_8/output/LayerNorm/beta/read"
+ input: "mul_799"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_640"
+ op: "Assign"
+ input: "bert/encoder/layer_8/output/LayerNorm/beta"
+ input: "sub_148"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_641"
+ op: "Assign"
+ input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m"
+ input: "add_499"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_642"
+ op: "Assign"
+ input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v"
+ input: "add_500"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_800/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_800"
+ op: "Mul"
+ input: "Mul_800/x"
+ input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_801/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_801"
+ op: "Mul"
+ input: "Mul_801/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_148"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_502"
+ op: "Add"
+ input: "Mul_800"
+ input: "Mul_801"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_802/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_802"
+ op: "Mul"
+ input: "Mul_802/x"
+ input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_148"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_148"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_803/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_803"
+ op: "Mul"
+ input: "Mul_803/x"
+ input: "Square_148"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_503"
+ op: "Add"
+ input: "Mul_802"
+ input: "Mul_803"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_148"
+ op: "Sqrt"
+ input: "add_503"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_504/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_504"
+ op: "Add"
+ input: "Sqrt_148"
+ input: "add_504/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_149"
+ op: "RealDiv"
+ input: "add_502"
+ input: "add_504"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_804"
+ op: "Mul"
+ input: "add"
+ input: "truediv_149"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_149"
+ op: "Sub"
+ input: "bert/encoder/layer_8/output/LayerNorm/gamma/read"
+ input: "mul_804"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_643"
+ op: "Assign"
+ input: "bert/encoder/layer_8/output/LayerNorm/gamma"
+ input: "sub_149"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_644"
+ op: "Assign"
+ input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m"
+ input: "add_502"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_645"
+ op: "Assign"
+ input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v"
+ input: "add_503"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/query/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/query/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_9/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_9/attention/self/query/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/query/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/query/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/query/kernel/adam_m"
+ input: "bert/encoder/layer_9/attention/self/query/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/query/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/attention/self/query/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/query/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/query/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_9/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_9/attention/self/query/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/query/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/query/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/query/kernel/adam_v"
+ input: "bert/encoder/layer_9/attention/self/query/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/query/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/attention/self/query/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_805/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_805"
+ op: "Mul"
+ input: "Mul_805/x"
+ input: "bert/encoder/layer_9/attention/self/query/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_806/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_806"
+ op: "Mul"
+ input: "Mul_806/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_149"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_505"
+ op: "Add"
+ input: "Mul_805"
+ input: "Mul_806"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_807/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_807"
+ op: "Mul"
+ input: "Mul_807/x"
+ input: "bert/encoder/layer_9/attention/self/query/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_149"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_149"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_808/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_808"
+ op: "Mul"
+ input: "Mul_808/x"
+ input: "Square_149"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_506"
+ op: "Add"
+ input: "Mul_807"
+ input: "Mul_808"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_149"
+ op: "Sqrt"
+ input: "add_506"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_507/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_507"
+ op: "Add"
+ input: "Sqrt_149"
+ input: "add_507/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_150"
+ op: "RealDiv"
+ input: "add_505"
+ input: "add_507"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_809/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_809"
+ op: "Mul"
+ input: "mul_809/x"
+ input: "bert/encoder/layer_9/attention/self/query/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_508"
+ op: "Add"
+ input: "truediv_150"
+ input: "mul_809"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_810"
+ op: "Mul"
+ input: "add"
+ input: "add_508"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_150"
+ op: "Sub"
+ input: "bert/encoder/layer_9/attention/self/query/kernel/read"
+ input: "mul_810"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_646"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/query/kernel"
+ input: "sub_150"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_647"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/query/kernel/adam_m"
+ input: "add_505"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_648"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/query/kernel/adam_v"
+ input: "add_506"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/query/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/query/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/query/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/query/bias/adam_m"
+ input: "bert/encoder/layer_9/attention/self/query/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/query/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/attention/self/query/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/query/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/query/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/query/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/query/bias/adam_v"
+ input: "bert/encoder/layer_9/attention/self/query/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/query/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/attention/self/query/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_811/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_811"
+ op: "Mul"
+ input: "Mul_811/x"
+ input: "bert/encoder/layer_9/attention/self/query/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_812/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_812"
+ op: "Mul"
+ input: "Mul_812/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_150"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_509"
+ op: "Add"
+ input: "Mul_811"
+ input: "Mul_812"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_813/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_813"
+ op: "Mul"
+ input: "Mul_813/x"
+ input: "bert/encoder/layer_9/attention/self/query/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_150"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_150"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_814/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_814"
+ op: "Mul"
+ input: "Mul_814/x"
+ input: "Square_150"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_510"
+ op: "Add"
+ input: "Mul_813"
+ input: "Mul_814"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_150"
+ op: "Sqrt"
+ input: "add_510"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_511/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_511"
+ op: "Add"
+ input: "Sqrt_150"
+ input: "add_511/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_151"
+ op: "RealDiv"
+ input: "add_509"
+ input: "add_511"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_815"
+ op: "Mul"
+ input: "add"
+ input: "truediv_151"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_151"
+ op: "Sub"
+ input: "bert/encoder/layer_9/attention/self/query/bias/read"
+ input: "mul_815"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_649"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/query/bias"
+ input: "sub_151"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_650"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/query/bias/adam_m"
+ input: "add_509"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_651"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/query/bias/adam_v"
+ input: "add_510"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/key/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/key/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_9/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_9/attention/self/key/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/key/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/key/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/key/kernel/adam_m"
+ input: "bert/encoder/layer_9/attention/self/key/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/key/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/attention/self/key/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/key/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/key/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_9/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_9/attention/self/key/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/key/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/key/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/key/kernel/adam_v"
+ input: "bert/encoder/layer_9/attention/self/key/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/key/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/attention/self/key/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_816/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_816"
+ op: "Mul"
+ input: "Mul_816/x"
+ input: "bert/encoder/layer_9/attention/self/key/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_817/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_817"
+ op: "Mul"
+ input: "Mul_817/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_151"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_512"
+ op: "Add"
+ input: "Mul_816"
+ input: "Mul_817"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_818/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_818"
+ op: "Mul"
+ input: "Mul_818/x"
+ input: "bert/encoder/layer_9/attention/self/key/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_151"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_151"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_819/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_819"
+ op: "Mul"
+ input: "Mul_819/x"
+ input: "Square_151"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_513"
+ op: "Add"
+ input: "Mul_818"
+ input: "Mul_819"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_151"
+ op: "Sqrt"
+ input: "add_513"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_514/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_514"
+ op: "Add"
+ input: "Sqrt_151"
+ input: "add_514/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_152"
+ op: "RealDiv"
+ input: "add_512"
+ input: "add_514"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_820/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_820"
+ op: "Mul"
+ input: "mul_820/x"
+ input: "bert/encoder/layer_9/attention/self/key/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_515"
+ op: "Add"
+ input: "truediv_152"
+ input: "mul_820"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_821"
+ op: "Mul"
+ input: "add"
+ input: "add_515"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_152"
+ op: "Sub"
+ input: "bert/encoder/layer_9/attention/self/key/kernel/read"
+ input: "mul_821"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_652"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/key/kernel"
+ input: "sub_152"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_653"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/key/kernel/adam_m"
+ input: "add_512"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_654"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/key/kernel/adam_v"
+ input: "add_513"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/key/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/key/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/key/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/key/bias/adam_m"
+ input: "bert/encoder/layer_9/attention/self/key/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/key/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/attention/self/key/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/key/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/key/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/key/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/key/bias/adam_v"
+ input: "bert/encoder/layer_9/attention/self/key/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/key/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/attention/self/key/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_822/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_822"
+ op: "Mul"
+ input: "Mul_822/x"
+ input: "bert/encoder/layer_9/attention/self/key/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_823/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_823"
+ op: "Mul"
+ input: "Mul_823/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_152"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_516"
+ op: "Add"
+ input: "Mul_822"
+ input: "Mul_823"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_824/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_824"
+ op: "Mul"
+ input: "Mul_824/x"
+ input: "bert/encoder/layer_9/attention/self/key/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_152"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_152"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_825/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_825"
+ op: "Mul"
+ input: "Mul_825/x"
+ input: "Square_152"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_517"
+ op: "Add"
+ input: "Mul_824"
+ input: "Mul_825"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_152"
+ op: "Sqrt"
+ input: "add_517"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_518/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_518"
+ op: "Add"
+ input: "Sqrt_152"
+ input: "add_518/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_153"
+ op: "RealDiv"
+ input: "add_516"
+ input: "add_518"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_826"
+ op: "Mul"
+ input: "add"
+ input: "truediv_153"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_153"
+ op: "Sub"
+ input: "bert/encoder/layer_9/attention/self/key/bias/read"
+ input: "mul_826"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_655"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/key/bias"
+ input: "sub_153"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_656"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/key/bias/adam_m"
+ input: "add_516"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_657"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/key/bias/adam_v"
+ input: "add_517"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/value/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/value/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_9/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_9/attention/self/value/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/value/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/value/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/value/kernel/adam_m"
+ input: "bert/encoder/layer_9/attention/self/value/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/value/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/attention/self/value/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/value/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/value/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_9/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_9/attention/self/value/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/value/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/value/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/value/kernel/adam_v"
+ input: "bert/encoder/layer_9/attention/self/value/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/value/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/attention/self/value/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_827/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_827"
+ op: "Mul"
+ input: "Mul_827/x"
+ input: "bert/encoder/layer_9/attention/self/value/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_828/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_828"
+ op: "Mul"
+ input: "Mul_828/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_153"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_519"
+ op: "Add"
+ input: "Mul_827"
+ input: "Mul_828"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_829/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_829"
+ op: "Mul"
+ input: "Mul_829/x"
+ input: "bert/encoder/layer_9/attention/self/value/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_153"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_153"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_830/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_830"
+ op: "Mul"
+ input: "Mul_830/x"
+ input: "Square_153"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_520"
+ op: "Add"
+ input: "Mul_829"
+ input: "Mul_830"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_153"
+ op: "Sqrt"
+ input: "add_520"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_521/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_521"
+ op: "Add"
+ input: "Sqrt_153"
+ input: "add_521/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_154"
+ op: "RealDiv"
+ input: "add_519"
+ input: "add_521"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_831/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_831"
+ op: "Mul"
+ input: "mul_831/x"
+ input: "bert/encoder/layer_9/attention/self/value/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_522"
+ op: "Add"
+ input: "truediv_154"
+ input: "mul_831"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_832"
+ op: "Mul"
+ input: "add"
+ input: "add_522"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_154"
+ op: "Sub"
+ input: "bert/encoder/layer_9/attention/self/value/kernel/read"
+ input: "mul_832"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_658"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/value/kernel"
+ input: "sub_154"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_659"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/value/kernel/adam_m"
+ input: "add_519"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_660"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/value/kernel/adam_v"
+ input: "add_520"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/value/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/value/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/value/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/value/bias/adam_m"
+ input: "bert/encoder/layer_9/attention/self/value/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/value/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/attention/self/value/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/value/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/value/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/value/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/value/bias/adam_v"
+ input: "bert/encoder/layer_9/attention/self/value/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/self/value/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/attention/self/value/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_833/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_833"
+ op: "Mul"
+ input: "Mul_833/x"
+ input: "bert/encoder/layer_9/attention/self/value/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_834/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_834"
+ op: "Mul"
+ input: "Mul_834/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_154"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_523"
+ op: "Add"
+ input: "Mul_833"
+ input: "Mul_834"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_835/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_835"
+ op: "Mul"
+ input: "Mul_835/x"
+ input: "bert/encoder/layer_9/attention/self/value/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_154"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_154"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_836/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_836"
+ op: "Mul"
+ input: "Mul_836/x"
+ input: "Square_154"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_524"
+ op: "Add"
+ input: "Mul_835"
+ input: "Mul_836"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_154"
+ op: "Sqrt"
+ input: "add_524"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_525/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_525"
+ op: "Add"
+ input: "Sqrt_154"
+ input: "add_525/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_155"
+ op: "RealDiv"
+ input: "add_523"
+ input: "add_525"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_837"
+ op: "Mul"
+ input: "add"
+ input: "truediv_155"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_155"
+ op: "Sub"
+ input: "bert/encoder/layer_9/attention/self/value/bias/read"
+ input: "mul_837"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_661"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/value/bias"
+ input: "sub_155"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_662"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/value/bias/adam_m"
+ input: "add_523"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_663"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/self/value/bias/adam_v"
+ input: "add_524"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_838/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_838"
+ op: "Mul"
+ input: "Mul_838/x"
+ input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_839/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_839"
+ op: "Mul"
+ input: "Mul_839/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_155"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_526"
+ op: "Add"
+ input: "Mul_838"
+ input: "Mul_839"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_840/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_840"
+ op: "Mul"
+ input: "Mul_840/x"
+ input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_155"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_155"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_841/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_841"
+ op: "Mul"
+ input: "Mul_841/x"
+ input: "Square_155"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_527"
+ op: "Add"
+ input: "Mul_840"
+ input: "Mul_841"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_155"
+ op: "Sqrt"
+ input: "add_527"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_528/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_528"
+ op: "Add"
+ input: "Sqrt_155"
+ input: "add_528/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_156"
+ op: "RealDiv"
+ input: "add_526"
+ input: "add_528"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_842/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_842"
+ op: "Mul"
+ input: "mul_842/x"
+ input: "bert/encoder/layer_9/attention/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_529"
+ op: "Add"
+ input: "truediv_156"
+ input: "mul_842"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_843"
+ op: "Mul"
+ input: "add"
+ input: "add_529"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_156"
+ op: "Sub"
+ input: "bert/encoder/layer_9/attention/output/dense/kernel/read"
+ input: "mul_843"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_664"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/output/dense/kernel"
+ input: "sub_156"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_665"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m"
+ input: "add_526"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_666"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v"
+ input: "add_527"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dense/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dense/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dense/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_9/attention/output/dense/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dense/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/attention/output/dense/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dense/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dense/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dense/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_9/attention/output/dense/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/dense/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/attention/output/dense/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_844/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_844"
+ op: "Mul"
+ input: "Mul_844/x"
+ input: "bert/encoder/layer_9/attention/output/dense/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_845/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_845"
+ op: "Mul"
+ input: "Mul_845/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_156"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_530"
+ op: "Add"
+ input: "Mul_844"
+ input: "Mul_845"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_846/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_846"
+ op: "Mul"
+ input: "Mul_846/x"
+ input: "bert/encoder/layer_9/attention/output/dense/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_156"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_156"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_847/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_847"
+ op: "Mul"
+ input: "Mul_847/x"
+ input: "Square_156"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_531"
+ op: "Add"
+ input: "Mul_846"
+ input: "Mul_847"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_156"
+ op: "Sqrt"
+ input: "add_531"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_532/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_532"
+ op: "Add"
+ input: "Sqrt_156"
+ input: "add_532/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_157"
+ op: "RealDiv"
+ input: "add_530"
+ input: "add_532"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_848"
+ op: "Mul"
+ input: "add"
+ input: "truediv_157"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_157"
+ op: "Sub"
+ input: "bert/encoder/layer_9/attention/output/dense/bias/read"
+ input: "mul_848"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_667"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/output/dense/bias"
+ input: "sub_157"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_668"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/output/dense/bias/adam_m"
+ input: "add_530"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_669"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/output/dense/bias/adam_v"
+ input: "add_531"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_849/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_849"
+ op: "Mul"
+ input: "Mul_849/x"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_850/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_850"
+ op: "Mul"
+ input: "Mul_850/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_157"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_533"
+ op: "Add"
+ input: "Mul_849"
+ input: "Mul_850"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_851/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_851"
+ op: "Mul"
+ input: "Mul_851/x"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_157"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_157"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_852/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_852"
+ op: "Mul"
+ input: "Mul_852/x"
+ input: "Square_157"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_534"
+ op: "Add"
+ input: "Mul_851"
+ input: "Mul_852"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_157"
+ op: "Sqrt"
+ input: "add_534"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_535/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_535"
+ op: "Add"
+ input: "Sqrt_157"
+ input: "add_535/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_158"
+ op: "RealDiv"
+ input: "add_533"
+ input: "add_535"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_853"
+ op: "Mul"
+ input: "add"
+ input: "truediv_158"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_158"
+ op: "Sub"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/read"
+ input: "mul_853"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_670"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/beta"
+ input: "sub_158"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_671"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m"
+ input: "add_533"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_672"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v"
+ input: "add_534"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_854/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_854"
+ op: "Mul"
+ input: "Mul_854/x"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_855/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_855"
+ op: "Mul"
+ input: "Mul_855/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_158"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_536"
+ op: "Add"
+ input: "Mul_854"
+ input: "Mul_855"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_856/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_856"
+ op: "Mul"
+ input: "Mul_856/x"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_158"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_158"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_857/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_857"
+ op: "Mul"
+ input: "Mul_857/x"
+ input: "Square_158"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_537"
+ op: "Add"
+ input: "Mul_856"
+ input: "Mul_857"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_158"
+ op: "Sqrt"
+ input: "add_537"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_538/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_538"
+ op: "Add"
+ input: "Sqrt_158"
+ input: "add_538/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_159"
+ op: "RealDiv"
+ input: "add_536"
+ input: "add_538"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_858"
+ op: "Mul"
+ input: "add"
+ input: "truediv_159"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_159"
+ op: "Sub"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/read"
+ input: "mul_858"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_673"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma"
+ input: "sub_159"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_674"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m"
+ input: "add_536"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_675"
+ op: "Assign"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v"
+ input: "add_537"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m"
+ input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v"
+ input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_859/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_859"
+ op: "Mul"
+ input: "Mul_859/x"
+ input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_860/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_860"
+ op: "Mul"
+ input: "Mul_860/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_159"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_539"
+ op: "Add"
+ input: "Mul_859"
+ input: "Mul_860"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_861/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_861"
+ op: "Mul"
+ input: "Mul_861/x"
+ input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_159"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_159"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_862/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_862"
+ op: "Mul"
+ input: "Mul_862/x"
+ input: "Square_159"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_540"
+ op: "Add"
+ input: "Mul_861"
+ input: "Mul_862"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_159"
+ op: "Sqrt"
+ input: "add_540"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_541/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_541"
+ op: "Add"
+ input: "Sqrt_159"
+ input: "add_541/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_160"
+ op: "RealDiv"
+ input: "add_539"
+ input: "add_541"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_863/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_863"
+ op: "Mul"
+ input: "mul_863/x"
+ input: "bert/encoder/layer_9/intermediate/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_542"
+ op: "Add"
+ input: "truediv_160"
+ input: "mul_863"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_864"
+ op: "Mul"
+ input: "add"
+ input: "add_542"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_160"
+ op: "Sub"
+ input: "bert/encoder/layer_9/intermediate/dense/kernel/read"
+ input: "mul_864"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_676"
+ op: "Assign"
+ input: "bert/encoder/layer_9/intermediate/dense/kernel"
+ input: "sub_160"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_677"
+ op: "Assign"
+ input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m"
+ input: "add_539"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_678"
+ op: "Assign"
+ input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v"
+ input: "add_540"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 3072
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/bias/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/bias/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_9/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_9/intermediate/dense/bias/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/intermediate/dense/bias/adam_m"
+ input: "bert/encoder/layer_9/intermediate/dense/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/intermediate/dense/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 3072
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/bias/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/bias/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_9/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_9/intermediate/dense/bias/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/intermediate/dense/bias/adam_v"
+ input: "bert/encoder/layer_9/intermediate/dense/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/intermediate/dense/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/intermediate/dense/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_865/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_865"
+ op: "Mul"
+ input: "Mul_865/x"
+ input: "bert/encoder/layer_9/intermediate/dense/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_866/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_866"
+ op: "Mul"
+ input: "Mul_866/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_160"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_543"
+ op: "Add"
+ input: "Mul_865"
+ input: "Mul_866"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_867/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_867"
+ op: "Mul"
+ input: "Mul_867/x"
+ input: "bert/encoder/layer_9/intermediate/dense/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_160"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_160"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_868/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_868"
+ op: "Mul"
+ input: "Mul_868/x"
+ input: "Square_160"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_544"
+ op: "Add"
+ input: "Mul_867"
+ input: "Mul_868"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_160"
+ op: "Sqrt"
+ input: "add_544"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_545/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_545"
+ op: "Add"
+ input: "Sqrt_160"
+ input: "add_545/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_161"
+ op: "RealDiv"
+ input: "add_543"
+ input: "add_545"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_869"
+ op: "Mul"
+ input: "add"
+ input: "truediv_161"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_161"
+ op: "Sub"
+ input: "bert/encoder/layer_9/intermediate/dense/bias/read"
+ input: "mul_869"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_679"
+ op: "Assign"
+ input: "bert/encoder/layer_9/intermediate/dense/bias"
+ input: "sub_161"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_680"
+ op: "Assign"
+ input: "bert/encoder/layer_9/intermediate/dense/bias/adam_m"
+ input: "add_543"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_681"
+ op: "Assign"
+ input: "bert/encoder/layer_9/intermediate/dense/bias/adam_v"
+ input: "add_544"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\014\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dense/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_9/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_9/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dense/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dense/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_9/output/dense/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dense/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/output/dense/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\014\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dense/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_9/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_9/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dense/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dense/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_9/output/dense/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dense/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/output/dense/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_870/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_870"
+ op: "Mul"
+ input: "Mul_870/x"
+ input: "bert/encoder/layer_9/output/dense/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_871/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_871"
+ op: "Mul"
+ input: "Mul_871/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_161"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_546"
+ op: "Add"
+ input: "Mul_870"
+ input: "Mul_871"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_872/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_872"
+ op: "Mul"
+ input: "Mul_872/x"
+ input: "bert/encoder/layer_9/output/dense/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_161"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_161"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_873/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_873"
+ op: "Mul"
+ input: "Mul_873/x"
+ input: "Square_161"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_547"
+ op: "Add"
+ input: "Mul_872"
+ input: "Mul_873"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_161"
+ op: "Sqrt"
+ input: "add_547"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_548/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_548"
+ op: "Add"
+ input: "Sqrt_161"
+ input: "add_548/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_162"
+ op: "RealDiv"
+ input: "add_546"
+ input: "add_548"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_874/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_874"
+ op: "Mul"
+ input: "mul_874/x"
+ input: "bert/encoder/layer_9/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_549"
+ op: "Add"
+ input: "truediv_162"
+ input: "mul_874"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_875"
+ op: "Mul"
+ input: "add"
+ input: "add_549"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_162"
+ op: "Sub"
+ input: "bert/encoder/layer_9/output/dense/kernel/read"
+ input: "mul_875"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_682"
+ op: "Assign"
+ input: "bert/encoder/layer_9/output/dense/kernel"
+ input: "sub_162"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_683"
+ op: "Assign"
+ input: "bert/encoder/layer_9/output/dense/kernel/adam_m"
+ input: "add_546"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_684"
+ op: "Assign"
+ input: "bert/encoder/layer_9/output/dense/kernel/adam_v"
+ input: "add_547"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dense/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dense/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dense/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_9/output/dense/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dense/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/output/dense/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dense/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dense/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dense/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_9/output/dense/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/dense/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/output/dense/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_876/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_876"
+ op: "Mul"
+ input: "Mul_876/x"
+ input: "bert/encoder/layer_9/output/dense/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_877/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_877"
+ op: "Mul"
+ input: "Mul_877/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_162"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_550"
+ op: "Add"
+ input: "Mul_876"
+ input: "Mul_877"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_878/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_878"
+ op: "Mul"
+ input: "Mul_878/x"
+ input: "bert/encoder/layer_9/output/dense/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_162"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_162"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_879/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_879"
+ op: "Mul"
+ input: "Mul_879/x"
+ input: "Square_162"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_551"
+ op: "Add"
+ input: "Mul_878"
+ input: "Mul_879"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_162"
+ op: "Sqrt"
+ input: "add_551"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_552/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_552"
+ op: "Add"
+ input: "Sqrt_162"
+ input: "add_552/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_163"
+ op: "RealDiv"
+ input: "add_550"
+ input: "add_552"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_880"
+ op: "Mul"
+ input: "add"
+ input: "truediv_163"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_163"
+ op: "Sub"
+ input: "bert/encoder/layer_9/output/dense/bias/read"
+ input: "mul_880"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_685"
+ op: "Assign"
+ input: "bert/encoder/layer_9/output/dense/bias"
+ input: "sub_163"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_686"
+ op: "Assign"
+ input: "bert/encoder/layer_9/output/dense/bias/adam_m"
+ input: "add_550"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_687"
+ op: "Assign"
+ input: "bert/encoder/layer_9/output/dense/bias/adam_v"
+ input: "add_551"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_881/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_881"
+ op: "Mul"
+ input: "Mul_881/x"
+ input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_882/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_882"
+ op: "Mul"
+ input: "Mul_882/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_163"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_553"
+ op: "Add"
+ input: "Mul_881"
+ input: "Mul_882"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_883/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_883"
+ op: "Mul"
+ input: "Mul_883/x"
+ input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_163"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_163"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_884/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_884"
+ op: "Mul"
+ input: "Mul_884/x"
+ input: "Square_163"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_554"
+ op: "Add"
+ input: "Mul_883"
+ input: "Mul_884"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_163"
+ op: "Sqrt"
+ input: "add_554"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_555/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_555"
+ op: "Add"
+ input: "Sqrt_163"
+ input: "add_555/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_164"
+ op: "RealDiv"
+ input: "add_553"
+ input: "add_555"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_885"
+ op: "Mul"
+ input: "add"
+ input: "truediv_164"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_164"
+ op: "Sub"
+ input: "bert/encoder/layer_9/output/LayerNorm/beta/read"
+ input: "mul_885"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_688"
+ op: "Assign"
+ input: "bert/encoder/layer_9/output/LayerNorm/beta"
+ input: "sub_164"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_689"
+ op: "Assign"
+ input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m"
+ input: "add_553"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_690"
+ op: "Assign"
+ input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v"
+ input: "add_554"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_886/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_886"
+ op: "Mul"
+ input: "Mul_886/x"
+ input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_887/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_887"
+ op: "Mul"
+ input: "Mul_887/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_164"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_556"
+ op: "Add"
+ input: "Mul_886"
+ input: "Mul_887"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_888/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_888"
+ op: "Mul"
+ input: "Mul_888/x"
+ input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_164"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_164"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_889/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_889"
+ op: "Mul"
+ input: "Mul_889/x"
+ input: "Square_164"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_557"
+ op: "Add"
+ input: "Mul_888"
+ input: "Mul_889"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_164"
+ op: "Sqrt"
+ input: "add_557"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_558/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_558"
+ op: "Add"
+ input: "Sqrt_164"
+ input: "add_558/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_165"
+ op: "RealDiv"
+ input: "add_556"
+ input: "add_558"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_890"
+ op: "Mul"
+ input: "add"
+ input: "truediv_165"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_165"
+ op: "Sub"
+ input: "bert/encoder/layer_9/output/LayerNorm/gamma/read"
+ input: "mul_890"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_691"
+ op: "Assign"
+ input: "bert/encoder/layer_9/output/LayerNorm/gamma"
+ input: "sub_165"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_692"
+ op: "Assign"
+ input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m"
+ input: "add_556"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_693"
+ op: "Assign"
+ input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v"
+ input: "add_557"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/query/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/query/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_10/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_10/attention/self/query/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/query/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/query/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/query/kernel/adam_m"
+ input: "bert/encoder/layer_10/attention/self/query/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/query/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/attention/self/query/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/query/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/query/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_10/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_10/attention/self/query/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/query/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/query/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/query/kernel/adam_v"
+ input: "bert/encoder/layer_10/attention/self/query/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/query/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/attention/self/query/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_891/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_891"
+ op: "Mul"
+ input: "Mul_891/x"
+ input: "bert/encoder/layer_10/attention/self/query/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_892/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_892"
+ op: "Mul"
+ input: "Mul_892/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_165"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_559"
+ op: "Add"
+ input: "Mul_891"
+ input: "Mul_892"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_893/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_893"
+ op: "Mul"
+ input: "Mul_893/x"
+ input: "bert/encoder/layer_10/attention/self/query/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_165"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_165"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_894/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_894"
+ op: "Mul"
+ input: "Mul_894/x"
+ input: "Square_165"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_560"
+ op: "Add"
+ input: "Mul_893"
+ input: "Mul_894"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_165"
+ op: "Sqrt"
+ input: "add_560"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_561/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_561"
+ op: "Add"
+ input: "Sqrt_165"
+ input: "add_561/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_166"
+ op: "RealDiv"
+ input: "add_559"
+ input: "add_561"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_895/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_895"
+ op: "Mul"
+ input: "mul_895/x"
+ input: "bert/encoder/layer_10/attention/self/query/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_562"
+ op: "Add"
+ input: "truediv_166"
+ input: "mul_895"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_896"
+ op: "Mul"
+ input: "add"
+ input: "add_562"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_166"
+ op: "Sub"
+ input: "bert/encoder/layer_10/attention/self/query/kernel/read"
+ input: "mul_896"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_694"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/query/kernel"
+ input: "sub_166"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_695"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/query/kernel/adam_m"
+ input: "add_559"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_696"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/query/kernel/adam_v"
+ input: "add_560"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/query/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/query/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/query/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/query/bias/adam_m"
+ input: "bert/encoder/layer_10/attention/self/query/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/query/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/attention/self/query/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/query/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/query/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/query/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/query/bias/adam_v"
+ input: "bert/encoder/layer_10/attention/self/query/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/query/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/attention/self/query/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_897/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_897"
+ op: "Mul"
+ input: "Mul_897/x"
+ input: "bert/encoder/layer_10/attention/self/query/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_898/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_898"
+ op: "Mul"
+ input: "Mul_898/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_166"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_563"
+ op: "Add"
+ input: "Mul_897"
+ input: "Mul_898"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_899/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_899"
+ op: "Mul"
+ input: "Mul_899/x"
+ input: "bert/encoder/layer_10/attention/self/query/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_166"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_166"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_900/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_900"
+ op: "Mul"
+ input: "Mul_900/x"
+ input: "Square_166"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_564"
+ op: "Add"
+ input: "Mul_899"
+ input: "Mul_900"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_166"
+ op: "Sqrt"
+ input: "add_564"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_565/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_565"
+ op: "Add"
+ input: "Sqrt_166"
+ input: "add_565/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_167"
+ op: "RealDiv"
+ input: "add_563"
+ input: "add_565"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_901"
+ op: "Mul"
+ input: "add"
+ input: "truediv_167"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_167"
+ op: "Sub"
+ input: "bert/encoder/layer_10/attention/self/query/bias/read"
+ input: "mul_901"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_697"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/query/bias"
+ input: "sub_167"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_698"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/query/bias/adam_m"
+ input: "add_563"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_699"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/query/bias/adam_v"
+ input: "add_564"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/key/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/key/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_10/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_10/attention/self/key/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/key/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/key/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/key/kernel/adam_m"
+ input: "bert/encoder/layer_10/attention/self/key/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/key/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/attention/self/key/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/key/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/key/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_10/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_10/attention/self/key/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/key/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/key/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/key/kernel/adam_v"
+ input: "bert/encoder/layer_10/attention/self/key/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/key/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/attention/self/key/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_902/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_902"
+ op: "Mul"
+ input: "Mul_902/x"
+ input: "bert/encoder/layer_10/attention/self/key/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_903/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_903"
+ op: "Mul"
+ input: "Mul_903/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_167"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_566"
+ op: "Add"
+ input: "Mul_902"
+ input: "Mul_903"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_904/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_904"
+ op: "Mul"
+ input: "Mul_904/x"
+ input: "bert/encoder/layer_10/attention/self/key/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_167"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_167"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_905/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_905"
+ op: "Mul"
+ input: "Mul_905/x"
+ input: "Square_167"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_567"
+ op: "Add"
+ input: "Mul_904"
+ input: "Mul_905"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_167"
+ op: "Sqrt"
+ input: "add_567"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_568/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_568"
+ op: "Add"
+ input: "Sqrt_167"
+ input: "add_568/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_168"
+ op: "RealDiv"
+ input: "add_566"
+ input: "add_568"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_906/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_906"
+ op: "Mul"
+ input: "mul_906/x"
+ input: "bert/encoder/layer_10/attention/self/key/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_569"
+ op: "Add"
+ input: "truediv_168"
+ input: "mul_906"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_907"
+ op: "Mul"
+ input: "add"
+ input: "add_569"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_168"
+ op: "Sub"
+ input: "bert/encoder/layer_10/attention/self/key/kernel/read"
+ input: "mul_907"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_700"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/key/kernel"
+ input: "sub_168"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_701"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/key/kernel/adam_m"
+ input: "add_566"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_702"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/key/kernel/adam_v"
+ input: "add_567"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/key/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/key/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/key/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/key/bias/adam_m"
+ input: "bert/encoder/layer_10/attention/self/key/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/key/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/attention/self/key/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/key/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/key/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/key/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/key/bias/adam_v"
+ input: "bert/encoder/layer_10/attention/self/key/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/key/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/attention/self/key/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_908/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_908"
+ op: "Mul"
+ input: "Mul_908/x"
+ input: "bert/encoder/layer_10/attention/self/key/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_909/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_909"
+ op: "Mul"
+ input: "Mul_909/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_168"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_570"
+ op: "Add"
+ input: "Mul_908"
+ input: "Mul_909"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_910/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_910"
+ op: "Mul"
+ input: "Mul_910/x"
+ input: "bert/encoder/layer_10/attention/self/key/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_168"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_168"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_911/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_911"
+ op: "Mul"
+ input: "Mul_911/x"
+ input: "Square_168"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_571"
+ op: "Add"
+ input: "Mul_910"
+ input: "Mul_911"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_168"
+ op: "Sqrt"
+ input: "add_571"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_572/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_572"
+ op: "Add"
+ input: "Sqrt_168"
+ input: "add_572/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_169"
+ op: "RealDiv"
+ input: "add_570"
+ input: "add_572"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_912"
+ op: "Mul"
+ input: "add"
+ input: "truediv_169"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_169"
+ op: "Sub"
+ input: "bert/encoder/layer_10/attention/self/key/bias/read"
+ input: "mul_912"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_703"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/key/bias"
+ input: "sub_169"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_704"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/key/bias/adam_m"
+ input: "add_570"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_705"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/key/bias/adam_v"
+ input: "add_571"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/value/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/value/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_10/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_10/attention/self/value/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/value/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/value/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/value/kernel/adam_m"
+ input: "bert/encoder/layer_10/attention/self/value/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/value/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/attention/self/value/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/value/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/value/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_10/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_10/attention/self/value/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/value/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/value/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/value/kernel/adam_v"
+ input: "bert/encoder/layer_10/attention/self/value/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/value/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/attention/self/value/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_913/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_913"
+ op: "Mul"
+ input: "Mul_913/x"
+ input: "bert/encoder/layer_10/attention/self/value/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_914/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_914"
+ op: "Mul"
+ input: "Mul_914/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_169"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_573"
+ op: "Add"
+ input: "Mul_913"
+ input: "Mul_914"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_915/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_915"
+ op: "Mul"
+ input: "Mul_915/x"
+ input: "bert/encoder/layer_10/attention/self/value/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_169"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_169"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_916/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_916"
+ op: "Mul"
+ input: "Mul_916/x"
+ input: "Square_169"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_574"
+ op: "Add"
+ input: "Mul_915"
+ input: "Mul_916"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_169"
+ op: "Sqrt"
+ input: "add_574"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_575/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_575"
+ op: "Add"
+ input: "Sqrt_169"
+ input: "add_575/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_170"
+ op: "RealDiv"
+ input: "add_573"
+ input: "add_575"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_917/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_917"
+ op: "Mul"
+ input: "mul_917/x"
+ input: "bert/encoder/layer_10/attention/self/value/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_576"
+ op: "Add"
+ input: "truediv_170"
+ input: "mul_917"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_918"
+ op: "Mul"
+ input: "add"
+ input: "add_576"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_170"
+ op: "Sub"
+ input: "bert/encoder/layer_10/attention/self/value/kernel/read"
+ input: "mul_918"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_706"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/value/kernel"
+ input: "sub_170"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_707"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/value/kernel/adam_m"
+ input: "add_573"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_708"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/value/kernel/adam_v"
+ input: "add_574"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/value/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/value/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/value/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/value/bias/adam_m"
+ input: "bert/encoder/layer_10/attention/self/value/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/value/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/attention/self/value/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/value/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/value/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/value/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/value/bias/adam_v"
+ input: "bert/encoder/layer_10/attention/self/value/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/self/value/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/attention/self/value/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_919/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_919"
+ op: "Mul"
+ input: "Mul_919/x"
+ input: "bert/encoder/layer_10/attention/self/value/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_920/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_920"
+ op: "Mul"
+ input: "Mul_920/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_170"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_577"
+ op: "Add"
+ input: "Mul_919"
+ input: "Mul_920"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_921/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_921"
+ op: "Mul"
+ input: "Mul_921/x"
+ input: "bert/encoder/layer_10/attention/self/value/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_170"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_170"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_922/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_922"
+ op: "Mul"
+ input: "Mul_922/x"
+ input: "Square_170"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_578"
+ op: "Add"
+ input: "Mul_921"
+ input: "Mul_922"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_170"
+ op: "Sqrt"
+ input: "add_578"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_579/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_579"
+ op: "Add"
+ input: "Sqrt_170"
+ input: "add_579/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_171"
+ op: "RealDiv"
+ input: "add_577"
+ input: "add_579"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_923"
+ op: "Mul"
+ input: "add"
+ input: "truediv_171"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_171"
+ op: "Sub"
+ input: "bert/encoder/layer_10/attention/self/value/bias/read"
+ input: "mul_923"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_709"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/value/bias"
+ input: "sub_171"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_710"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/value/bias/adam_m"
+ input: "add_577"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_711"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/value/bias/adam_v"
+ input: "add_578"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_924/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_924"
+ op: "Mul"
+ input: "Mul_924/x"
+ input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_925/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_925"
+ op: "Mul"
+ input: "Mul_925/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_171"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_580"
+ op: "Add"
+ input: "Mul_924"
+ input: "Mul_925"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_926/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_926"
+ op: "Mul"
+ input: "Mul_926/x"
+ input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_171"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_171"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_927/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_927"
+ op: "Mul"
+ input: "Mul_927/x"
+ input: "Square_171"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_581"
+ op: "Add"
+ input: "Mul_926"
+ input: "Mul_927"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_171"
+ op: "Sqrt"
+ input: "add_581"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_582/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_582"
+ op: "Add"
+ input: "Sqrt_171"
+ input: "add_582/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_172"
+ op: "RealDiv"
+ input: "add_580"
+ input: "add_582"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_928/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_928"
+ op: "Mul"
+ input: "mul_928/x"
+ input: "bert/encoder/layer_10/attention/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_583"
+ op: "Add"
+ input: "truediv_172"
+ input: "mul_928"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_929"
+ op: "Mul"
+ input: "add"
+ input: "add_583"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_172"
+ op: "Sub"
+ input: "bert/encoder/layer_10/attention/output/dense/kernel/read"
+ input: "mul_929"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_712"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/output/dense/kernel"
+ input: "sub_172"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_713"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m"
+ input: "add_580"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_714"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v"
+ input: "add_581"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dense/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dense/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dense/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_10/attention/output/dense/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dense/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/attention/output/dense/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dense/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dense/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dense/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_10/attention/output/dense/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/dense/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/attention/output/dense/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_930/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_930"
+ op: "Mul"
+ input: "Mul_930/x"
+ input: "bert/encoder/layer_10/attention/output/dense/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_931/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_931"
+ op: "Mul"
+ input: "Mul_931/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_172"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_584"
+ op: "Add"
+ input: "Mul_930"
+ input: "Mul_931"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_932/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_932"
+ op: "Mul"
+ input: "Mul_932/x"
+ input: "bert/encoder/layer_10/attention/output/dense/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_172"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_172"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_933/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_933"
+ op: "Mul"
+ input: "Mul_933/x"
+ input: "Square_172"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_585"
+ op: "Add"
+ input: "Mul_932"
+ input: "Mul_933"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_172"
+ op: "Sqrt"
+ input: "add_585"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_586/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_586"
+ op: "Add"
+ input: "Sqrt_172"
+ input: "add_586/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_173"
+ op: "RealDiv"
+ input: "add_584"
+ input: "add_586"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_934"
+ op: "Mul"
+ input: "add"
+ input: "truediv_173"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_173"
+ op: "Sub"
+ input: "bert/encoder/layer_10/attention/output/dense/bias/read"
+ input: "mul_934"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_715"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/output/dense/bias"
+ input: "sub_173"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_716"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/output/dense/bias/adam_m"
+ input: "add_584"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_717"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/output/dense/bias/adam_v"
+ input: "add_585"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_935/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_935"
+ op: "Mul"
+ input: "Mul_935/x"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_936/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_936"
+ op: "Mul"
+ input: "Mul_936/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_173"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_587"
+ op: "Add"
+ input: "Mul_935"
+ input: "Mul_936"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_937/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_937"
+ op: "Mul"
+ input: "Mul_937/x"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_173"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_173"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_938/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_938"
+ op: "Mul"
+ input: "Mul_938/x"
+ input: "Square_173"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_588"
+ op: "Add"
+ input: "Mul_937"
+ input: "Mul_938"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_173"
+ op: "Sqrt"
+ input: "add_588"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_589/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_589"
+ op: "Add"
+ input: "Sqrt_173"
+ input: "add_589/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_174"
+ op: "RealDiv"
+ input: "add_587"
+ input: "add_589"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_939"
+ op: "Mul"
+ input: "add"
+ input: "truediv_174"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_174"
+ op: "Sub"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/read"
+ input: "mul_939"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_718"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/beta"
+ input: "sub_174"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_719"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m"
+ input: "add_587"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_720"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v"
+ input: "add_588"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_940/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_940"
+ op: "Mul"
+ input: "Mul_940/x"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_941/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_941"
+ op: "Mul"
+ input: "Mul_941/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_174"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_590"
+ op: "Add"
+ input: "Mul_940"
+ input: "Mul_941"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_942/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_942"
+ op: "Mul"
+ input: "Mul_942/x"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_174"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_174"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_943/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_943"
+ op: "Mul"
+ input: "Mul_943/x"
+ input: "Square_174"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_591"
+ op: "Add"
+ input: "Mul_942"
+ input: "Mul_943"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_174"
+ op: "Sqrt"
+ input: "add_591"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_592/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_592"
+ op: "Add"
+ input: "Sqrt_174"
+ input: "add_592/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_175"
+ op: "RealDiv"
+ input: "add_590"
+ input: "add_592"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_944"
+ op: "Mul"
+ input: "add"
+ input: "truediv_175"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_175"
+ op: "Sub"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/read"
+ input: "mul_944"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_721"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma"
+ input: "sub_175"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_722"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m"
+ input: "add_590"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_723"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v"
+ input: "add_591"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m"
+ input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v"
+ input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_945/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_945"
+ op: "Mul"
+ input: "Mul_945/x"
+ input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_946/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_946"
+ op: "Mul"
+ input: "Mul_946/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_175"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_593"
+ op: "Add"
+ input: "Mul_945"
+ input: "Mul_946"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_947/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_947"
+ op: "Mul"
+ input: "Mul_947/x"
+ input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_175"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_175"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_948/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_948"
+ op: "Mul"
+ input: "Mul_948/x"
+ input: "Square_175"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_594"
+ op: "Add"
+ input: "Mul_947"
+ input: "Mul_948"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_175"
+ op: "Sqrt"
+ input: "add_594"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_595/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_595"
+ op: "Add"
+ input: "Sqrt_175"
+ input: "add_595/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_176"
+ op: "RealDiv"
+ input: "add_593"
+ input: "add_595"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_949/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_949"
+ op: "Mul"
+ input: "mul_949/x"
+ input: "bert/encoder/layer_10/intermediate/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_596"
+ op: "Add"
+ input: "truediv_176"
+ input: "mul_949"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_950"
+ op: "Mul"
+ input: "add"
+ input: "add_596"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_176"
+ op: "Sub"
+ input: "bert/encoder/layer_10/intermediate/dense/kernel/read"
+ input: "mul_950"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_724"
+ op: "Assign"
+ input: "bert/encoder/layer_10/intermediate/dense/kernel"
+ input: "sub_176"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_725"
+ op: "Assign"
+ input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m"
+ input: "add_593"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_726"
+ op: "Assign"
+ input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v"
+ input: "add_594"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 3072
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/bias/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/bias/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_10/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_10/intermediate/dense/bias/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/intermediate/dense/bias/adam_m"
+ input: "bert/encoder/layer_10/intermediate/dense/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/intermediate/dense/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 3072
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/bias/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/bias/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_10/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_10/intermediate/dense/bias/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/intermediate/dense/bias/adam_v"
+ input: "bert/encoder/layer_10/intermediate/dense/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/intermediate/dense/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/intermediate/dense/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_951/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_951"
+ op: "Mul"
+ input: "Mul_951/x"
+ input: "bert/encoder/layer_10/intermediate/dense/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_952/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_952"
+ op: "Mul"
+ input: "Mul_952/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_176"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_597"
+ op: "Add"
+ input: "Mul_951"
+ input: "Mul_952"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_953/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_953"
+ op: "Mul"
+ input: "Mul_953/x"
+ input: "bert/encoder/layer_10/intermediate/dense/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_176"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_176"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_954/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_954"
+ op: "Mul"
+ input: "Mul_954/x"
+ input: "Square_176"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_598"
+ op: "Add"
+ input: "Mul_953"
+ input: "Mul_954"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_176"
+ op: "Sqrt"
+ input: "add_598"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_599/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_599"
+ op: "Add"
+ input: "Sqrt_176"
+ input: "add_599/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_177"
+ op: "RealDiv"
+ input: "add_597"
+ input: "add_599"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_955"
+ op: "Mul"
+ input: "add"
+ input: "truediv_177"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_177"
+ op: "Sub"
+ input: "bert/encoder/layer_10/intermediate/dense/bias/read"
+ input: "mul_955"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_727"
+ op: "Assign"
+ input: "bert/encoder/layer_10/intermediate/dense/bias"
+ input: "sub_177"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_728"
+ op: "Assign"
+ input: "bert/encoder/layer_10/intermediate/dense/bias/adam_m"
+ input: "add_597"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_729"
+ op: "Assign"
+ input: "bert/encoder/layer_10/intermediate/dense/bias/adam_v"
+ input: "add_598"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\014\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dense/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_10/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_10/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dense/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dense/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_10/output/dense/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dense/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/output/dense/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\014\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dense/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_10/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_10/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dense/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dense/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_10/output/dense/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dense/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/output/dense/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_956/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_956"
+ op: "Mul"
+ input: "Mul_956/x"
+ input: "bert/encoder/layer_10/output/dense/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_957/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_957"
+ op: "Mul"
+ input: "Mul_957/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_177"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_600"
+ op: "Add"
+ input: "Mul_956"
+ input: "Mul_957"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_958/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_958"
+ op: "Mul"
+ input: "Mul_958/x"
+ input: "bert/encoder/layer_10/output/dense/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_177"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_177"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_959/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_959"
+ op: "Mul"
+ input: "Mul_959/x"
+ input: "Square_177"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_601"
+ op: "Add"
+ input: "Mul_958"
+ input: "Mul_959"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_177"
+ op: "Sqrt"
+ input: "add_601"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_602/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_602"
+ op: "Add"
+ input: "Sqrt_177"
+ input: "add_602/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_178"
+ op: "RealDiv"
+ input: "add_600"
+ input: "add_602"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_960/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_960"
+ op: "Mul"
+ input: "mul_960/x"
+ input: "bert/encoder/layer_10/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_603"
+ op: "Add"
+ input: "truediv_178"
+ input: "mul_960"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_961"
+ op: "Mul"
+ input: "add"
+ input: "add_603"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_178"
+ op: "Sub"
+ input: "bert/encoder/layer_10/output/dense/kernel/read"
+ input: "mul_961"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_730"
+ op: "Assign"
+ input: "bert/encoder/layer_10/output/dense/kernel"
+ input: "sub_178"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_731"
+ op: "Assign"
+ input: "bert/encoder/layer_10/output/dense/kernel/adam_m"
+ input: "add_600"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_732"
+ op: "Assign"
+ input: "bert/encoder/layer_10/output/dense/kernel/adam_v"
+ input: "add_601"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dense/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dense/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dense/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_10/output/dense/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dense/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/output/dense/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dense/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dense/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dense/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_10/output/dense/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/dense/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/output/dense/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_962/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_962"
+ op: "Mul"
+ input: "Mul_962/x"
+ input: "bert/encoder/layer_10/output/dense/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_963/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_963"
+ op: "Mul"
+ input: "Mul_963/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_178"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_604"
+ op: "Add"
+ input: "Mul_962"
+ input: "Mul_963"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_964/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_964"
+ op: "Mul"
+ input: "Mul_964/x"
+ input: "bert/encoder/layer_10/output/dense/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_178"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_178"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_965/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_965"
+ op: "Mul"
+ input: "Mul_965/x"
+ input: "Square_178"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_605"
+ op: "Add"
+ input: "Mul_964"
+ input: "Mul_965"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_178"
+ op: "Sqrt"
+ input: "add_605"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_606/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_606"
+ op: "Add"
+ input: "Sqrt_178"
+ input: "add_606/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_179"
+ op: "RealDiv"
+ input: "add_604"
+ input: "add_606"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_966"
+ op: "Mul"
+ input: "add"
+ input: "truediv_179"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_179"
+ op: "Sub"
+ input: "bert/encoder/layer_10/output/dense/bias/read"
+ input: "mul_966"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_733"
+ op: "Assign"
+ input: "bert/encoder/layer_10/output/dense/bias"
+ input: "sub_179"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_734"
+ op: "Assign"
+ input: "bert/encoder/layer_10/output/dense/bias/adam_m"
+ input: "add_604"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_735"
+ op: "Assign"
+ input: "bert/encoder/layer_10/output/dense/bias/adam_v"
+ input: "add_605"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_967/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_967"
+ op: "Mul"
+ input: "Mul_967/x"
+ input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_968/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_968"
+ op: "Mul"
+ input: "Mul_968/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_179"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_607"
+ op: "Add"
+ input: "Mul_967"
+ input: "Mul_968"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_969/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_969"
+ op: "Mul"
+ input: "Mul_969/x"
+ input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_179"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_179"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_970/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_970"
+ op: "Mul"
+ input: "Mul_970/x"
+ input: "Square_179"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_608"
+ op: "Add"
+ input: "Mul_969"
+ input: "Mul_970"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_179"
+ op: "Sqrt"
+ input: "add_608"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_609/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_609"
+ op: "Add"
+ input: "Sqrt_179"
+ input: "add_609/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_180"
+ op: "RealDiv"
+ input: "add_607"
+ input: "add_609"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_971"
+ op: "Mul"
+ input: "add"
+ input: "truediv_180"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_180"
+ op: "Sub"
+ input: "bert/encoder/layer_10/output/LayerNorm/beta/read"
+ input: "mul_971"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_736"
+ op: "Assign"
+ input: "bert/encoder/layer_10/output/LayerNorm/beta"
+ input: "sub_180"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_737"
+ op: "Assign"
+ input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m"
+ input: "add_607"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_738"
+ op: "Assign"
+ input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v"
+ input: "add_608"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_972/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_972"
+ op: "Mul"
+ input: "Mul_972/x"
+ input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_973/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_973"
+ op: "Mul"
+ input: "Mul_973/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_180"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_610"
+ op: "Add"
+ input: "Mul_972"
+ input: "Mul_973"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_974/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_974"
+ op: "Mul"
+ input: "Mul_974/x"
+ input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_180"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_180"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_975/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_975"
+ op: "Mul"
+ input: "Mul_975/x"
+ input: "Square_180"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_611"
+ op: "Add"
+ input: "Mul_974"
+ input: "Mul_975"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_180"
+ op: "Sqrt"
+ input: "add_611"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_612/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_612"
+ op: "Add"
+ input: "Sqrt_180"
+ input: "add_612/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_181"
+ op: "RealDiv"
+ input: "add_610"
+ input: "add_612"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_976"
+ op: "Mul"
+ input: "add"
+ input: "truediv_181"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_181"
+ op: "Sub"
+ input: "bert/encoder/layer_10/output/LayerNorm/gamma/read"
+ input: "mul_976"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_739"
+ op: "Assign"
+ input: "bert/encoder/layer_10/output/LayerNorm/gamma"
+ input: "sub_181"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_740"
+ op: "Assign"
+ input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m"
+ input: "add_610"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_741"
+ op: "Assign"
+ input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v"
+ input: "add_611"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/query/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/query/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_11/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_11/attention/self/query/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/query/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/query/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/query/kernel/adam_m"
+ input: "bert/encoder/layer_11/attention/self/query/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/query/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/attention/self/query/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/query/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/query/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_11/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_11/attention/self/query/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/query/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/query/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/query/kernel/adam_v"
+ input: "bert/encoder/layer_11/attention/self/query/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/query/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/attention/self/query/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_977/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_977"
+ op: "Mul"
+ input: "Mul_977/x"
+ input: "bert/encoder/layer_11/attention/self/query/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_978/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_978"
+ op: "Mul"
+ input: "Mul_978/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_181"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_613"
+ op: "Add"
+ input: "Mul_977"
+ input: "Mul_978"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_979/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_979"
+ op: "Mul"
+ input: "Mul_979/x"
+ input: "bert/encoder/layer_11/attention/self/query/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_181"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_181"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_980/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_980"
+ op: "Mul"
+ input: "Mul_980/x"
+ input: "Square_181"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_614"
+ op: "Add"
+ input: "Mul_979"
+ input: "Mul_980"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_181"
+ op: "Sqrt"
+ input: "add_614"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_615/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_615"
+ op: "Add"
+ input: "Sqrt_181"
+ input: "add_615/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_182"
+ op: "RealDiv"
+ input: "add_613"
+ input: "add_615"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_981/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_981"
+ op: "Mul"
+ input: "mul_981/x"
+ input: "bert/encoder/layer_11/attention/self/query/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_616"
+ op: "Add"
+ input: "truediv_182"
+ input: "mul_981"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_982"
+ op: "Mul"
+ input: "add"
+ input: "add_616"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_182"
+ op: "Sub"
+ input: "bert/encoder/layer_11/attention/self/query/kernel/read"
+ input: "mul_982"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_742"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/query/kernel"
+ input: "sub_182"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_743"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/query/kernel/adam_m"
+ input: "add_613"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_744"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/query/kernel/adam_v"
+ input: "add_614"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/query/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/query/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/query/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/query/bias/adam_m"
+ input: "bert/encoder/layer_11/attention/self/query/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/query/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/attention/self/query/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/query/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/query/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/query/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/query/bias/adam_v"
+ input: "bert/encoder/layer_11/attention/self/query/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/query/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/attention/self/query/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_983/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_983"
+ op: "Mul"
+ input: "Mul_983/x"
+ input: "bert/encoder/layer_11/attention/self/query/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_984/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_984"
+ op: "Mul"
+ input: "Mul_984/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_182"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_617"
+ op: "Add"
+ input: "Mul_983"
+ input: "Mul_984"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_985/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_985"
+ op: "Mul"
+ input: "Mul_985/x"
+ input: "bert/encoder/layer_11/attention/self/query/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_182"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_182"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_986/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_986"
+ op: "Mul"
+ input: "Mul_986/x"
+ input: "Square_182"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_618"
+ op: "Add"
+ input: "Mul_985"
+ input: "Mul_986"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_182"
+ op: "Sqrt"
+ input: "add_618"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_619/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_619"
+ op: "Add"
+ input: "Sqrt_182"
+ input: "add_619/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_183"
+ op: "RealDiv"
+ input: "add_617"
+ input: "add_619"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_987"
+ op: "Mul"
+ input: "add"
+ input: "truediv_183"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_183"
+ op: "Sub"
+ input: "bert/encoder/layer_11/attention/self/query/bias/read"
+ input: "mul_987"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_745"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/query/bias"
+ input: "sub_183"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_746"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/query/bias/adam_m"
+ input: "add_617"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_747"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/query/bias/adam_v"
+ input: "add_618"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/key/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/key/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_11/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_11/attention/self/key/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/key/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/key/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/key/kernel/adam_m"
+ input: "bert/encoder/layer_11/attention/self/key/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/key/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/attention/self/key/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/key/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/key/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_11/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_11/attention/self/key/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/key/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/key/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/key/kernel/adam_v"
+ input: "bert/encoder/layer_11/attention/self/key/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/key/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/attention/self/key/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_988/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_988"
+ op: "Mul"
+ input: "Mul_988/x"
+ input: "bert/encoder/layer_11/attention/self/key/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_989/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_989"
+ op: "Mul"
+ input: "Mul_989/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_183"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_620"
+ op: "Add"
+ input: "Mul_988"
+ input: "Mul_989"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_990/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_990"
+ op: "Mul"
+ input: "Mul_990/x"
+ input: "bert/encoder/layer_11/attention/self/key/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_183"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_183"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_991/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_991"
+ op: "Mul"
+ input: "Mul_991/x"
+ input: "Square_183"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_621"
+ op: "Add"
+ input: "Mul_990"
+ input: "Mul_991"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_183"
+ op: "Sqrt"
+ input: "add_621"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_622/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_622"
+ op: "Add"
+ input: "Sqrt_183"
+ input: "add_622/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_184"
+ op: "RealDiv"
+ input: "add_620"
+ input: "add_622"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_992/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_992"
+ op: "Mul"
+ input: "mul_992/x"
+ input: "bert/encoder/layer_11/attention/self/key/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_623"
+ op: "Add"
+ input: "truediv_184"
+ input: "mul_992"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_993"
+ op: "Mul"
+ input: "add"
+ input: "add_623"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_184"
+ op: "Sub"
+ input: "bert/encoder/layer_11/attention/self/key/kernel/read"
+ input: "mul_993"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_748"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/key/kernel"
+ input: "sub_184"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_749"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/key/kernel/adam_m"
+ input: "add_620"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_750"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/key/kernel/adam_v"
+ input: "add_621"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/key/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/key/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/key/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/key/bias/adam_m"
+ input: "bert/encoder/layer_11/attention/self/key/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/key/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/attention/self/key/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/key/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/key/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/key/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/key/bias/adam_v"
+ input: "bert/encoder/layer_11/attention/self/key/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/key/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/attention/self/key/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_994/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_994"
+ op: "Mul"
+ input: "Mul_994/x"
+ input: "bert/encoder/layer_11/attention/self/key/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_995/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_995"
+ op: "Mul"
+ input: "Mul_995/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_184"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_624"
+ op: "Add"
+ input: "Mul_994"
+ input: "Mul_995"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_996/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_996"
+ op: "Mul"
+ input: "Mul_996/x"
+ input: "bert/encoder/layer_11/attention/self/key/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_184"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_184"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_997/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_997"
+ op: "Mul"
+ input: "Mul_997/x"
+ input: "Square_184"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_625"
+ op: "Add"
+ input: "Mul_996"
+ input: "Mul_997"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_184"
+ op: "Sqrt"
+ input: "add_625"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_626/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_626"
+ op: "Add"
+ input: "Sqrt_184"
+ input: "add_626/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_185"
+ op: "RealDiv"
+ input: "add_624"
+ input: "add_626"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_998"
+ op: "Mul"
+ input: "add"
+ input: "truediv_185"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_185"
+ op: "Sub"
+ input: "bert/encoder/layer_11/attention/self/key/bias/read"
+ input: "mul_998"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_751"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/key/bias"
+ input: "sub_185"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_752"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/key/bias/adam_m"
+ input: "add_624"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_753"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/key/bias/adam_v"
+ input: "add_625"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/value/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/value/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_11/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_11/attention/self/value/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/value/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/value/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/value/kernel/adam_m"
+ input: "bert/encoder/layer_11/attention/self/value/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/value/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/attention/self/value/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/value/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/value/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_11/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_11/attention/self/value/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/value/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/value/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/value/kernel/adam_v"
+ input: "bert/encoder/layer_11/attention/self/value/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/value/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/attention/self/value/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_999/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_999"
+ op: "Mul"
+ input: "Mul_999/x"
+ input: "bert/encoder/layer_11/attention/self/value/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1000/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1000"
+ op: "Mul"
+ input: "Mul_1000/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_185"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_627"
+ op: "Add"
+ input: "Mul_999"
+ input: "Mul_1000"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1001/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1001"
+ op: "Mul"
+ input: "Mul_1001/x"
+ input: "bert/encoder/layer_11/attention/self/value/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_185"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_185"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1002/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1002"
+ op: "Mul"
+ input: "Mul_1002/x"
+ input: "Square_185"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_628"
+ op: "Add"
+ input: "Mul_1001"
+ input: "Mul_1002"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_185"
+ op: "Sqrt"
+ input: "add_628"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_629/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_629"
+ op: "Add"
+ input: "Sqrt_185"
+ input: "add_629/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_186"
+ op: "RealDiv"
+ input: "add_627"
+ input: "add_629"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_1003/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_1003"
+ op: "Mul"
+ input: "mul_1003/x"
+ input: "bert/encoder/layer_11/attention/self/value/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_630"
+ op: "Add"
+ input: "truediv_186"
+ input: "mul_1003"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_1004"
+ op: "Mul"
+ input: "add"
+ input: "add_630"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_186"
+ op: "Sub"
+ input: "bert/encoder/layer_11/attention/self/value/kernel/read"
+ input: "mul_1004"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_754"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/value/kernel"
+ input: "sub_186"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_755"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/value/kernel/adam_m"
+ input: "add_627"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_756"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/value/kernel/adam_v"
+ input: "add_628"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/value/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/value/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/value/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/value/bias/adam_m"
+ input: "bert/encoder/layer_11/attention/self/value/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/value/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/attention/self/value/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/value/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/value/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/value/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/value/bias/adam_v"
+ input: "bert/encoder/layer_11/attention/self/value/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/self/value/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/attention/self/value/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1005/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1005"
+ op: "Mul"
+ input: "Mul_1005/x"
+ input: "bert/encoder/layer_11/attention/self/value/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1006/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1006"
+ op: "Mul"
+ input: "Mul_1006/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_186"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_631"
+ op: "Add"
+ input: "Mul_1005"
+ input: "Mul_1006"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1007/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1007"
+ op: "Mul"
+ input: "Mul_1007/x"
+ input: "bert/encoder/layer_11/attention/self/value/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_186"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_186"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1008/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1008"
+ op: "Mul"
+ input: "Mul_1008/x"
+ input: "Square_186"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_632"
+ op: "Add"
+ input: "Mul_1007"
+ input: "Mul_1008"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_186"
+ op: "Sqrt"
+ input: "add_632"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_633/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_633"
+ op: "Add"
+ input: "Sqrt_186"
+ input: "add_633/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_187"
+ op: "RealDiv"
+ input: "add_631"
+ input: "add_633"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_1009"
+ op: "Mul"
+ input: "add"
+ input: "truediv_187"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_187"
+ op: "Sub"
+ input: "bert/encoder/layer_11/attention/self/value/bias/read"
+ input: "mul_1009"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_757"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/value/bias"
+ input: "sub_187"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_758"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/value/bias/adam_m"
+ input: "add_631"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_759"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/value/bias/adam_v"
+ input: "add_632"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1010/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1010"
+ op: "Mul"
+ input: "Mul_1010/x"
+ input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1011/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1011"
+ op: "Mul"
+ input: "Mul_1011/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_187"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_634"
+ op: "Add"
+ input: "Mul_1010"
+ input: "Mul_1011"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1012/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1012"
+ op: "Mul"
+ input: "Mul_1012/x"
+ input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_187"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_187"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1013/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1013"
+ op: "Mul"
+ input: "Mul_1013/x"
+ input: "Square_187"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_635"
+ op: "Add"
+ input: "Mul_1012"
+ input: "Mul_1013"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_187"
+ op: "Sqrt"
+ input: "add_635"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_636/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_636"
+ op: "Add"
+ input: "Sqrt_187"
+ input: "add_636/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_188"
+ op: "RealDiv"
+ input: "add_634"
+ input: "add_636"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_1014/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_1014"
+ op: "Mul"
+ input: "mul_1014/x"
+ input: "bert/encoder/layer_11/attention/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_637"
+ op: "Add"
+ input: "truediv_188"
+ input: "mul_1014"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_1015"
+ op: "Mul"
+ input: "add"
+ input: "add_637"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_188"
+ op: "Sub"
+ input: "bert/encoder/layer_11/attention/output/dense/kernel/read"
+ input: "mul_1015"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_760"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/output/dense/kernel"
+ input: "sub_188"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_761"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m"
+ input: "add_634"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_762"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v"
+ input: "add_635"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dense/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dense/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dense/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_11/attention/output/dense/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dense/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/attention/output/dense/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dense/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dense/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dense/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_11/attention/output/dense/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/dense/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/attention/output/dense/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1016/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1016"
+ op: "Mul"
+ input: "Mul_1016/x"
+ input: "bert/encoder/layer_11/attention/output/dense/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1017/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1017"
+ op: "Mul"
+ input: "Mul_1017/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_188"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_638"
+ op: "Add"
+ input: "Mul_1016"
+ input: "Mul_1017"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1018/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1018"
+ op: "Mul"
+ input: "Mul_1018/x"
+ input: "bert/encoder/layer_11/attention/output/dense/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_188"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_188"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1019/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1019"
+ op: "Mul"
+ input: "Mul_1019/x"
+ input: "Square_188"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_639"
+ op: "Add"
+ input: "Mul_1018"
+ input: "Mul_1019"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_188"
+ op: "Sqrt"
+ input: "add_639"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_640/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_640"
+ op: "Add"
+ input: "Sqrt_188"
+ input: "add_640/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_189"
+ op: "RealDiv"
+ input: "add_638"
+ input: "add_640"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_1020"
+ op: "Mul"
+ input: "add"
+ input: "truediv_189"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_189"
+ op: "Sub"
+ input: "bert/encoder/layer_11/attention/output/dense/bias/read"
+ input: "mul_1020"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_763"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/output/dense/bias"
+ input: "sub_189"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_764"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/output/dense/bias/adam_m"
+ input: "add_638"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_765"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/output/dense/bias/adam_v"
+ input: "add_639"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1021/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1021"
+ op: "Mul"
+ input: "Mul_1021/x"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1022/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1022"
+ op: "Mul"
+ input: "Mul_1022/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_189"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_641"
+ op: "Add"
+ input: "Mul_1021"
+ input: "Mul_1022"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1023/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1023"
+ op: "Mul"
+ input: "Mul_1023/x"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_189"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_189"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1024/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1024"
+ op: "Mul"
+ input: "Mul_1024/x"
+ input: "Square_189"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_642"
+ op: "Add"
+ input: "Mul_1023"
+ input: "Mul_1024"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_189"
+ op: "Sqrt"
+ input: "add_642"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_643/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_643"
+ op: "Add"
+ input: "Sqrt_189"
+ input: "add_643/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_190"
+ op: "RealDiv"
+ input: "add_641"
+ input: "add_643"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_1025"
+ op: "Mul"
+ input: "add"
+ input: "truediv_190"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_190"
+ op: "Sub"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/read"
+ input: "mul_1025"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_766"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/beta"
+ input: "sub_190"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_767"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m"
+ input: "add_641"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_768"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v"
+ input: "add_642"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1026/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1026"
+ op: "Mul"
+ input: "Mul_1026/x"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1027/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1027"
+ op: "Mul"
+ input: "Mul_1027/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_190"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_644"
+ op: "Add"
+ input: "Mul_1026"
+ input: "Mul_1027"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1028/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1028"
+ op: "Mul"
+ input: "Mul_1028/x"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_190"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_190"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1029/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1029"
+ op: "Mul"
+ input: "Mul_1029/x"
+ input: "Square_190"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_645"
+ op: "Add"
+ input: "Mul_1028"
+ input: "Mul_1029"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_190"
+ op: "Sqrt"
+ input: "add_645"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_646/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_646"
+ op: "Add"
+ input: "Sqrt_190"
+ input: "add_646/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_191"
+ op: "RealDiv"
+ input: "add_644"
+ input: "add_646"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_1030"
+ op: "Mul"
+ input: "add"
+ input: "truediv_191"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_191"
+ op: "Sub"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/read"
+ input: "mul_1030"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_769"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma"
+ input: "sub_191"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_770"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m"
+ input: "add_644"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_771"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v"
+ input: "add_645"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m"
+ input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\014\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v"
+ input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1031/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1031"
+ op: "Mul"
+ input: "Mul_1031/x"
+ input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1032/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1032"
+ op: "Mul"
+ input: "Mul_1032/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_191"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_647"
+ op: "Add"
+ input: "Mul_1031"
+ input: "Mul_1032"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1033/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1033"
+ op: "Mul"
+ input: "Mul_1033/x"
+ input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_191"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_191"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1034/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1034"
+ op: "Mul"
+ input: "Mul_1034/x"
+ input: "Square_191"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_648"
+ op: "Add"
+ input: "Mul_1033"
+ input: "Mul_1034"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_191"
+ op: "Sqrt"
+ input: "add_648"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_649/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_649"
+ op: "Add"
+ input: "Sqrt_191"
+ input: "add_649/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_192"
+ op: "RealDiv"
+ input: "add_647"
+ input: "add_649"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_1035/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_1035"
+ op: "Mul"
+ input: "mul_1035/x"
+ input: "bert/encoder/layer_11/intermediate/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_650"
+ op: "Add"
+ input: "truediv_192"
+ input: "mul_1035"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_1036"
+ op: "Mul"
+ input: "add"
+ input: "add_650"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_192"
+ op: "Sub"
+ input: "bert/encoder/layer_11/intermediate/dense/kernel/read"
+ input: "mul_1036"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_772"
+ op: "Assign"
+ input: "bert/encoder/layer_11/intermediate/dense/kernel"
+ input: "sub_192"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_773"
+ op: "Assign"
+ input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m"
+ input: "add_647"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_774"
+ op: "Assign"
+ input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v"
+ input: "add_648"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 3072
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/bias/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/bias/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_11/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_11/intermediate/dense/bias/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/intermediate/dense/bias/adam_m"
+ input: "bert/encoder/layer_11/intermediate/dense/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/intermediate/dense/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 3072
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/bias/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/bias/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_11/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_11/intermediate/dense/bias/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/intermediate/dense/bias/adam_v"
+ input: "bert/encoder/layer_11/intermediate/dense/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/intermediate/dense/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/intermediate/dense/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1037/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1037"
+ op: "Mul"
+ input: "Mul_1037/x"
+ input: "bert/encoder/layer_11/intermediate/dense/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1038/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1038"
+ op: "Mul"
+ input: "Mul_1038/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_192"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_651"
+ op: "Add"
+ input: "Mul_1037"
+ input: "Mul_1038"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1039/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1039"
+ op: "Mul"
+ input: "Mul_1039/x"
+ input: "bert/encoder/layer_11/intermediate/dense/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_192"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_192"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1040/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1040"
+ op: "Mul"
+ input: "Mul_1040/x"
+ input: "Square_192"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_652"
+ op: "Add"
+ input: "Mul_1039"
+ input: "Mul_1040"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_192"
+ op: "Sqrt"
+ input: "add_652"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_653/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_653"
+ op: "Add"
+ input: "Sqrt_192"
+ input: "add_653/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_193"
+ op: "RealDiv"
+ input: "add_651"
+ input: "add_653"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_1041"
+ op: "Mul"
+ input: "add"
+ input: "truediv_193"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_193"
+ op: "Sub"
+ input: "bert/encoder/layer_11/intermediate/dense/bias/read"
+ input: "mul_1041"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_775"
+ op: "Assign"
+ input: "bert/encoder/layer_11/intermediate/dense/bias"
+ input: "sub_193"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_776"
+ op: "Assign"
+ input: "bert/encoder/layer_11/intermediate/dense/bias/adam_m"
+ input: "add_651"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_777"
+ op: "Assign"
+ input: "bert/encoder/layer_11/intermediate/dense/bias/adam_v"
+ input: "add_652"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\014\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dense/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_11/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_11/output/dense/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dense/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dense/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_11/output/dense/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dense/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/output/dense/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\014\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dense/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/encoder/layer_11/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/encoder/layer_11/output/dense/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dense/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dense/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_11/output/dense/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dense/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/output/dense/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1042/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1042"
+ op: "Mul"
+ input: "Mul_1042/x"
+ input: "bert/encoder/layer_11/output/dense/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1043/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1043"
+ op: "Mul"
+ input: "Mul_1043/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_193"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_654"
+ op: "Add"
+ input: "Mul_1042"
+ input: "Mul_1043"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1044/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1044"
+ op: "Mul"
+ input: "Mul_1044/x"
+ input: "bert/encoder/layer_11/output/dense/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_193"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_193"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1045/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1045"
+ op: "Mul"
+ input: "Mul_1045/x"
+ input: "Square_193"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_655"
+ op: "Add"
+ input: "Mul_1044"
+ input: "Mul_1045"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_193"
+ op: "Sqrt"
+ input: "add_655"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_656/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_656"
+ op: "Add"
+ input: "Sqrt_193"
+ input: "add_656/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_194"
+ op: "RealDiv"
+ input: "add_654"
+ input: "add_656"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_1046/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_1046"
+ op: "Mul"
+ input: "mul_1046/x"
+ input: "bert/encoder/layer_11/output/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_657"
+ op: "Add"
+ input: "truediv_194"
+ input: "mul_1046"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_1047"
+ op: "Mul"
+ input: "add"
+ input: "add_657"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_194"
+ op: "Sub"
+ input: "bert/encoder/layer_11/output/dense/kernel/read"
+ input: "mul_1047"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_778"
+ op: "Assign"
+ input: "bert/encoder/layer_11/output/dense/kernel"
+ input: "sub_194"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_779"
+ op: "Assign"
+ input: "bert/encoder/layer_11/output/dense/kernel/adam_m"
+ input: "add_654"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_780"
+ op: "Assign"
+ input: "bert/encoder/layer_11/output/dense/kernel/adam_v"
+ input: "add_655"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dense/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dense/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dense/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_11/output/dense/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dense/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/output/dense/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dense/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dense/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dense/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_11/output/dense/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/dense/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/output/dense/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1048/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1048"
+ op: "Mul"
+ input: "Mul_1048/x"
+ input: "bert/encoder/layer_11/output/dense/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1049/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1049"
+ op: "Mul"
+ input: "Mul_1049/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_194"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_658"
+ op: "Add"
+ input: "Mul_1048"
+ input: "Mul_1049"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1050/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1050"
+ op: "Mul"
+ input: "Mul_1050/x"
+ input: "bert/encoder/layer_11/output/dense/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_194"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_194"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1051/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1051"
+ op: "Mul"
+ input: "Mul_1051/x"
+ input: "Square_194"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_659"
+ op: "Add"
+ input: "Mul_1050"
+ input: "Mul_1051"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_194"
+ op: "Sqrt"
+ input: "add_659"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_660/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_660"
+ op: "Add"
+ input: "Sqrt_194"
+ input: "add_660/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_195"
+ op: "RealDiv"
+ input: "add_658"
+ input: "add_660"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_1052"
+ op: "Mul"
+ input: "add"
+ input: "truediv_195"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_195"
+ op: "Sub"
+ input: "bert/encoder/layer_11/output/dense/bias/read"
+ input: "mul_1052"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_781"
+ op: "Assign"
+ input: "bert/encoder/layer_11/output/dense/bias"
+ input: "sub_195"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_782"
+ op: "Assign"
+ input: "bert/encoder/layer_11/output/dense/bias/adam_m"
+ input: "add_658"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_783"
+ op: "Assign"
+ input: "bert/encoder/layer_11/output/dense/bias/adam_v"
+ input: "add_659"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1053/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1053"
+ op: "Mul"
+ input: "Mul_1053/x"
+ input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1054/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1054"
+ op: "Mul"
+ input: "Mul_1054/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_195"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_661"
+ op: "Add"
+ input: "Mul_1053"
+ input: "Mul_1054"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1055/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1055"
+ op: "Mul"
+ input: "Mul_1055/x"
+ input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_195"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_195"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1056/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1056"
+ op: "Mul"
+ input: "Mul_1056/x"
+ input: "Square_195"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_662"
+ op: "Add"
+ input: "Mul_1055"
+ input: "Mul_1056"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_195"
+ op: "Sqrt"
+ input: "add_662"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_663/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_663"
+ op: "Add"
+ input: "Sqrt_195"
+ input: "add_663/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_196"
+ op: "RealDiv"
+ input: "add_661"
+ input: "add_663"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_1057"
+ op: "Mul"
+ input: "add"
+ input: "truediv_196"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_196"
+ op: "Sub"
+ input: "bert/encoder/layer_11/output/LayerNorm/beta/read"
+ input: "mul_1057"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_784"
+ op: "Assign"
+ input: "bert/encoder/layer_11/output/LayerNorm/beta"
+ input: "sub_196"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_785"
+ op: "Assign"
+ input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m"
+ input: "add_661"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_786"
+ op: "Assign"
+ input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v"
+ input: "add_662"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v/Assign"
+ op: "Assign"
+ input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v/read"
+ op: "Identity"
+ input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1058/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1058"
+ op: "Mul"
+ input: "Mul_1058/x"
+ input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1059/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1059"
+ op: "Mul"
+ input: "Mul_1059/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_196"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_664"
+ op: "Add"
+ input: "Mul_1058"
+ input: "Mul_1059"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1060/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1060"
+ op: "Mul"
+ input: "Mul_1060/x"
+ input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_196"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_196"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1061/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1061"
+ op: "Mul"
+ input: "Mul_1061/x"
+ input: "Square_196"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_665"
+ op: "Add"
+ input: "Mul_1060"
+ input: "Mul_1061"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_196"
+ op: "Sqrt"
+ input: "add_665"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_666/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_666"
+ op: "Add"
+ input: "Sqrt_196"
+ input: "add_666/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_197"
+ op: "RealDiv"
+ input: "add_664"
+ input: "add_666"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_1062"
+ op: "Mul"
+ input: "add"
+ input: "truediv_197"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_197"
+ op: "Sub"
+ input: "bert/encoder/layer_11/output/LayerNorm/gamma/read"
+ input: "mul_1062"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_787"
+ op: "Assign"
+ input: "bert/encoder/layer_11/output/LayerNorm/gamma"
+ input: "sub_197"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_788"
+ op: "Assign"
+ input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m"
+ input: "add_664"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_789"
+ op: "Assign"
+ input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v"
+ input: "add_665"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/pooler/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/pooler/dense/kernel/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/pooler/dense/kernel/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "bert/pooler/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "bert/pooler/dense/kernel/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/pooler/dense/kernel/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/pooler/dense/kernel/adam_m/Assign"
+ op: "Assign"
+ input: "bert/pooler/dense/kernel/adam_m"
+ input: "bert/pooler/dense/kernel/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/pooler/dense/kernel/adam_m/read"
+ op: "Identity"
+ input: "bert/pooler/dense/kernel/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/pooler/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\000\003\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "bert/pooler/dense/kernel/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/pooler/dense/kernel/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "bert/pooler/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "bert/pooler/dense/kernel/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "bert/pooler/dense/kernel/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/pooler/dense/kernel/adam_v/Assign"
+ op: "Assign"
+ input: "bert/pooler/dense/kernel/adam_v"
+ input: "bert/pooler/dense/kernel/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/pooler/dense/kernel/adam_v/read"
+ op: "Identity"
+ input: "bert/pooler/dense/kernel/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1063/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1063"
+ op: "Mul"
+ input: "Mul_1063/x"
+ input: "bert/pooler/dense/kernel/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1064/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1064"
+ op: "Mul"
+ input: "Mul_1064/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_197"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_667"
+ op: "Add"
+ input: "Mul_1063"
+ input: "Mul_1064"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1065/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1065"
+ op: "Mul"
+ input: "Mul_1065/x"
+ input: "bert/pooler/dense/kernel/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_197"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_197"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1066/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1066"
+ op: "Mul"
+ input: "Mul_1066/x"
+ input: "Square_197"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_668"
+ op: "Add"
+ input: "Mul_1065"
+ input: "Mul_1066"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_197"
+ op: "Sqrt"
+ input: "add_668"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_669/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_669"
+ op: "Add"
+ input: "Sqrt_197"
+ input: "add_669/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_198"
+ op: "RealDiv"
+ input: "add_667"
+ input: "add_669"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_1067/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_1067"
+ op: "Mul"
+ input: "mul_1067/x"
+ input: "bert/pooler/dense/kernel/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_670"
+ op: "Add"
+ input: "truediv_198"
+ input: "mul_1067"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_1068"
+ op: "Mul"
+ input: "add"
+ input: "add_670"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_198"
+ op: "Sub"
+ input: "bert/pooler/dense/kernel/read"
+ input: "mul_1068"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_790"
+ op: "Assign"
+ input: "bert/pooler/dense/kernel"
+ input: "sub_198"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_791"
+ op: "Assign"
+ input: "bert/pooler/dense/kernel/adam_m"
+ input: "add_667"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_792"
+ op: "Assign"
+ input: "bert/pooler/dense/kernel/adam_v"
+ input: "add_668"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/pooler/dense/bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/pooler/dense/bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/pooler/dense/bias/adam_m/Assign"
+ op: "Assign"
+ input: "bert/pooler/dense/bias/adam_m"
+ input: "bert/pooler/dense/bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/pooler/dense/bias/adam_m/read"
+ op: "Identity"
+ input: "bert/pooler/dense/bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "bert/pooler/dense/bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 768
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "bert/pooler/dense/bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "bert/pooler/dense/bias/adam_v/Assign"
+ op: "Assign"
+ input: "bert/pooler/dense/bias/adam_v"
+ input: "bert/pooler/dense/bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "bert/pooler/dense/bias/adam_v/read"
+ op: "Identity"
+ input: "bert/pooler/dense/bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1069/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1069"
+ op: "Mul"
+ input: "Mul_1069/x"
+ input: "bert/pooler/dense/bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1070/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1070"
+ op: "Mul"
+ input: "Mul_1070/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_198"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_671"
+ op: "Add"
+ input: "Mul_1069"
+ input: "Mul_1070"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1071/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1071"
+ op: "Mul"
+ input: "Mul_1071/x"
+ input: "bert/pooler/dense/bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_198"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_198"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1072/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1072"
+ op: "Mul"
+ input: "Mul_1072/x"
+ input: "Square_198"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_672"
+ op: "Add"
+ input: "Mul_1071"
+ input: "Mul_1072"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_198"
+ op: "Sqrt"
+ input: "add_672"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_673/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_673"
+ op: "Add"
+ input: "Sqrt_198"
+ input: "add_673/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_199"
+ op: "RealDiv"
+ input: "add_671"
+ input: "add_673"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_1073"
+ op: "Mul"
+ input: "add"
+ input: "truediv_199"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_199"
+ op: "Sub"
+ input: "bert/pooler/dense/bias/read"
+ input: "mul_1073"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_793"
+ op: "Assign"
+ input: "bert/pooler/dense/bias"
+ input: "sub_199"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_794"
+ op: "Assign"
+ input: "bert/pooler/dense/bias/adam_m"
+ input: "add_671"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_795"
+ op: "Assign"
+ input: "bert/pooler/dense/bias/adam_v"
+ input: "add_672"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "output_weights/adam_m/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_weights/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\003\000\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "output_weights/adam_m/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_weights/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "output_weights/adam_m/Initializer/zeros"
+ op: "Fill"
+ input: "output_weights/adam_m/Initializer/zeros/shape_as_tensor"
+ input: "output_weights/adam_m/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_weights/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "output_weights/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_weights/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "output_weights/adam_m/Assign"
+ op: "Assign"
+ input: "output_weights/adam_m"
+ input: "output_weights/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_weights/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "output_weights/adam_m/read"
+ op: "Identity"
+ input: "output_weights/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_weights/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "output_weights/adam_v/Initializer/zeros/shape_as_tensor"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_weights/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ }
+ tensor_content: "\003\000\000\000\000\003\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "output_weights/adam_v/Initializer/zeros/Const"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_weights/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "output_weights/adam_v/Initializer/zeros"
+ op: "Fill"
+ input: "output_weights/adam_v/Initializer/zeros/shape_as_tensor"
+ input: "output_weights/adam_v/Initializer/zeros/Const"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_weights/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "index_type"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+node {
+ name: "output_weights/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_weights/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "output_weights/adam_v/Assign"
+ op: "Assign"
+ input: "output_weights/adam_v"
+ input: "output_weights/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_weights/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "output_weights/adam_v/read"
+ op: "Identity"
+ input: "output_weights/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_weights/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1074/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1074"
+ op: "Mul"
+ input: "Mul_1074/x"
+ input: "output_weights/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1075/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1075"
+ op: "Mul"
+ input: "Mul_1075/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_199"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_674"
+ op: "Add"
+ input: "Mul_1074"
+ input: "Mul_1075"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1076/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1076"
+ op: "Mul"
+ input: "Mul_1076/x"
+ input: "output_weights/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_199"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_199"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1077/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1077"
+ op: "Mul"
+ input: "Mul_1077/x"
+ input: "Square_199"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_675"
+ op: "Add"
+ input: "Mul_1076"
+ input: "Mul_1077"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_199"
+ op: "Sqrt"
+ input: "add_675"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_676/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_676"
+ op: "Add"
+ input: "Sqrt_199"
+ input: "add_676/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_200"
+ op: "RealDiv"
+ input: "add_674"
+ input: "add_676"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_1078/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+}
+node {
+ name: "mul_1078"
+ op: "Mul"
+ input: "mul_1078/x"
+ input: "output_weights/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_677"
+ op: "Add"
+ input: "truediv_200"
+ input: "mul_1078"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_1079"
+ op: "Mul"
+ input: "add"
+ input: "add_677"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_200"
+ op: "Sub"
+ input: "output_weights/read"
+ input: "mul_1079"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_796"
+ op: "Assign"
+ input: "output_weights"
+ input: "sub_200"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_weights"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_797"
+ op: "Assign"
+ input: "output_weights/adam_m"
+ input: "add_674"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_weights/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_798"
+ op: "Assign"
+ input: "output_weights/adam_v"
+ input: "add_675"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_weights/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "output_bias/adam_m/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 3
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "output_bias/adam_m"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "output_bias/adam_m/Assign"
+ op: "Assign"
+ input: "output_bias/adam_m"
+ input: "output_bias/adam_m/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "output_bias/adam_m/read"
+ op: "Identity"
+ input: "output_bias/adam_m"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "output_bias/adam_v/Initializer/zeros"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 3
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "output_bias/adam_v"
+ op: "VariableV2"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "output_bias/adam_v/Assign"
+ op: "Assign"
+ input: "output_bias/adam_v"
+ input: "output_bias/adam_v/Initializer/zeros"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "output_bias/adam_v/read"
+ op: "Identity"
+ input: "output_bias/adam_v"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1080/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.8999999761581421
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1080"
+ op: "Mul"
+ input: "Mul_1080/x"
+ input: "output_bias/adam_m/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1081/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.10000000149011612
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1081"
+ op: "Mul"
+ input: "Mul_1081/x"
+ input: "clip_by_global_norm/clip_by_global_norm/_200"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_678"
+ op: "Add"
+ input: "Mul_1080"
+ input: "Mul_1081"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1082/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.9990000128746033
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1082"
+ op: "Mul"
+ input: "Mul_1082/x"
+ input: "output_bias/adam_v/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Square_200"
+ op: "Square"
+ input: "clip_by_global_norm/clip_by_global_norm/_200"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1083/x"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000474974513
+ }
+ }
+ }
+}
+node {
+ name: "Mul_1083"
+ op: "Mul"
+ input: "Mul_1083/x"
+ input: "Square_200"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_679"
+ op: "Add"
+ input: "Mul_1082"
+ input: "Mul_1083"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Sqrt_200"
+ op: "Sqrt"
+ input: "add_679"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "add_680/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 9.999999974752427e-07
+ }
+ }
+ }
+}
+node {
+ name: "add_680"
+ op: "Add"
+ input: "Sqrt_200"
+ input: "add_680/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "truediv_201"
+ op: "RealDiv"
+ input: "add_678"
+ input: "add_680"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "mul_1084"
+ op: "Mul"
+ input: "add"
+ input: "truediv_201"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "sub_201"
+ op: "Sub"
+ input: "output_bias/read"
+ input: "mul_1084"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Assign_799"
+ op: "Assign"
+ input: "output_bias"
+ input: "sub_201"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_800"
+ op: "Assign"
+ input: "output_bias/adam_m"
+ input: "add_678"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "Assign_801"
+ op: "Assign"
+ input: "output_bias/adam_v"
+ input: "add_679"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: false
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "group_deps"
+ op: "NoOp"
+ input: "^Assign_199"
+ input: "^Assign_200"
+ input: "^Assign_201"
+ input: "^Assign_202"
+ input: "^Assign_203"
+ input: "^Assign_204"
+ input: "^Assign_205"
+ input: "^Assign_206"
+ input: "^Assign_207"
+ input: "^Assign_208"
+ input: "^Assign_209"
+ input: "^Assign_210"
+ input: "^Assign_211"
+ input: "^Assign_212"
+ input: "^Assign_213"
+ input: "^Assign_214"
+ input: "^Assign_215"
+ input: "^Assign_216"
+ input: "^Assign_217"
+ input: "^Assign_218"
+ input: "^Assign_219"
+ input: "^Assign_220"
+ input: "^Assign_221"
+ input: "^Assign_222"
+ input: "^Assign_223"
+ input: "^Assign_224"
+ input: "^Assign_225"
+ input: "^Assign_226"
+ input: "^Assign_227"
+ input: "^Assign_228"
+ input: "^Assign_229"
+ input: "^Assign_230"
+ input: "^Assign_231"
+ input: "^Assign_232"
+ input: "^Assign_233"
+ input: "^Assign_234"
+ input: "^Assign_235"
+ input: "^Assign_236"
+ input: "^Assign_237"
+ input: "^Assign_238"
+ input: "^Assign_239"
+ input: "^Assign_240"
+ input: "^Assign_241"
+ input: "^Assign_242"
+ input: "^Assign_243"
+ input: "^Assign_244"
+ input: "^Assign_245"
+ input: "^Assign_246"
+ input: "^Assign_247"
+ input: "^Assign_248"
+ input: "^Assign_249"
+ input: "^Assign_250"
+ input: "^Assign_251"
+ input: "^Assign_252"
+ input: "^Assign_253"
+ input: "^Assign_254"
+ input: "^Assign_255"
+ input: "^Assign_256"
+ input: "^Assign_257"
+ input: "^Assign_258"
+ input: "^Assign_259"
+ input: "^Assign_260"
+ input: "^Assign_261"
+ input: "^Assign_262"
+ input: "^Assign_263"
+ input: "^Assign_264"
+ input: "^Assign_265"
+ input: "^Assign_266"
+ input: "^Assign_267"
+ input: "^Assign_268"
+ input: "^Assign_269"
+ input: "^Assign_270"
+ input: "^Assign_271"
+ input: "^Assign_272"
+ input: "^Assign_273"
+ input: "^Assign_274"
+ input: "^Assign_275"
+ input: "^Assign_276"
+ input: "^Assign_277"
+ input: "^Assign_278"
+ input: "^Assign_279"
+ input: "^Assign_280"
+ input: "^Assign_281"
+ input: "^Assign_282"
+ input: "^Assign_283"
+ input: "^Assign_284"
+ input: "^Assign_285"
+ input: "^Assign_286"
+ input: "^Assign_287"
+ input: "^Assign_288"
+ input: "^Assign_289"
+ input: "^Assign_290"
+ input: "^Assign_291"
+ input: "^Assign_292"
+ input: "^Assign_293"
+ input: "^Assign_294"
+ input: "^Assign_295"
+ input: "^Assign_296"
+ input: "^Assign_297"
+ input: "^Assign_298"
+ input: "^Assign_299"
+ input: "^Assign_300"
+ input: "^Assign_301"
+ input: "^Assign_302"
+ input: "^Assign_303"
+ input: "^Assign_304"
+ input: "^Assign_305"
+ input: "^Assign_306"
+ input: "^Assign_307"
+ input: "^Assign_308"
+ input: "^Assign_309"
+ input: "^Assign_310"
+ input: "^Assign_311"
+ input: "^Assign_312"
+ input: "^Assign_313"
+ input: "^Assign_314"
+ input: "^Assign_315"
+ input: "^Assign_316"
+ input: "^Assign_317"
+ input: "^Assign_318"
+ input: "^Assign_319"
+ input: "^Assign_320"
+ input: "^Assign_321"
+ input: "^Assign_322"
+ input: "^Assign_323"
+ input: "^Assign_324"
+ input: "^Assign_325"
+ input: "^Assign_326"
+ input: "^Assign_327"
+ input: "^Assign_328"
+ input: "^Assign_329"
+ input: "^Assign_330"
+ input: "^Assign_331"
+ input: "^Assign_332"
+ input: "^Assign_333"
+ input: "^Assign_334"
+ input: "^Assign_335"
+ input: "^Assign_336"
+ input: "^Assign_337"
+ input: "^Assign_338"
+ input: "^Assign_339"
+ input: "^Assign_340"
+ input: "^Assign_341"
+ input: "^Assign_342"
+ input: "^Assign_343"
+ input: "^Assign_344"
+ input: "^Assign_345"
+ input: "^Assign_346"
+ input: "^Assign_347"
+ input: "^Assign_348"
+ input: "^Assign_349"
+ input: "^Assign_350"
+ input: "^Assign_351"
+ input: "^Assign_352"
+ input: "^Assign_353"
+ input: "^Assign_354"
+ input: "^Assign_355"
+ input: "^Assign_356"
+ input: "^Assign_357"
+ input: "^Assign_358"
+ input: "^Assign_359"
+ input: "^Assign_360"
+ input: "^Assign_361"
+ input: "^Assign_362"
+ input: "^Assign_363"
+ input: "^Assign_364"
+ input: "^Assign_365"
+ input: "^Assign_366"
+ input: "^Assign_367"
+ input: "^Assign_368"
+ input: "^Assign_369"
+ input: "^Assign_370"
+ input: "^Assign_371"
+ input: "^Assign_372"
+ input: "^Assign_373"
+ input: "^Assign_374"
+ input: "^Assign_375"
+ input: "^Assign_376"
+ input: "^Assign_377"
+ input: "^Assign_378"
+ input: "^Assign_379"
+ input: "^Assign_380"
+ input: "^Assign_381"
+ input: "^Assign_382"
+ input: "^Assign_383"
+ input: "^Assign_384"
+ input: "^Assign_385"
+ input: "^Assign_386"
+ input: "^Assign_387"
+ input: "^Assign_388"
+ input: "^Assign_389"
+ input: "^Assign_390"
+ input: "^Assign_391"
+ input: "^Assign_392"
+ input: "^Assign_393"
+ input: "^Assign_394"
+ input: "^Assign_395"
+ input: "^Assign_396"
+ input: "^Assign_397"
+ input: "^Assign_398"
+ input: "^Assign_399"
+ input: "^Assign_400"
+ input: "^Assign_401"
+ input: "^Assign_402"
+ input: "^Assign_403"
+ input: "^Assign_404"
+ input: "^Assign_405"
+ input: "^Assign_406"
+ input: "^Assign_407"
+ input: "^Assign_408"
+ input: "^Assign_409"
+ input: "^Assign_410"
+ input: "^Assign_411"
+ input: "^Assign_412"
+ input: "^Assign_413"
+ input: "^Assign_414"
+ input: "^Assign_415"
+ input: "^Assign_416"
+ input: "^Assign_417"
+ input: "^Assign_418"
+ input: "^Assign_419"
+ input: "^Assign_420"
+ input: "^Assign_421"
+ input: "^Assign_422"
+ input: "^Assign_423"
+ input: "^Assign_424"
+ input: "^Assign_425"
+ input: "^Assign_426"
+ input: "^Assign_427"
+ input: "^Assign_428"
+ input: "^Assign_429"
+ input: "^Assign_430"
+ input: "^Assign_431"
+ input: "^Assign_432"
+ input: "^Assign_433"
+ input: "^Assign_434"
+ input: "^Assign_435"
+ input: "^Assign_436"
+ input: "^Assign_437"
+ input: "^Assign_438"
+ input: "^Assign_439"
+ input: "^Assign_440"
+ input: "^Assign_441"
+ input: "^Assign_442"
+ input: "^Assign_443"
+ input: "^Assign_444"
+ input: "^Assign_445"
+ input: "^Assign_446"
+ input: "^Assign_447"
+ input: "^Assign_448"
+ input: "^Assign_449"
+ input: "^Assign_450"
+ input: "^Assign_451"
+ input: "^Assign_452"
+ input: "^Assign_453"
+ input: "^Assign_454"
+ input: "^Assign_455"
+ input: "^Assign_456"
+ input: "^Assign_457"
+ input: "^Assign_458"
+ input: "^Assign_459"
+ input: "^Assign_460"
+ input: "^Assign_461"
+ input: "^Assign_462"
+ input: "^Assign_463"
+ input: "^Assign_464"
+ input: "^Assign_465"
+ input: "^Assign_466"
+ input: "^Assign_467"
+ input: "^Assign_468"
+ input: "^Assign_469"
+ input: "^Assign_470"
+ input: "^Assign_471"
+ input: "^Assign_472"
+ input: "^Assign_473"
+ input: "^Assign_474"
+ input: "^Assign_475"
+ input: "^Assign_476"
+ input: "^Assign_477"
+ input: "^Assign_478"
+ input: "^Assign_479"
+ input: "^Assign_480"
+ input: "^Assign_481"
+ input: "^Assign_482"
+ input: "^Assign_483"
+ input: "^Assign_484"
+ input: "^Assign_485"
+ input: "^Assign_486"
+ input: "^Assign_487"
+ input: "^Assign_488"
+ input: "^Assign_489"
+ input: "^Assign_490"
+ input: "^Assign_491"
+ input: "^Assign_492"
+ input: "^Assign_493"
+ input: "^Assign_494"
+ input: "^Assign_495"
+ input: "^Assign_496"
+ input: "^Assign_497"
+ input: "^Assign_498"
+ input: "^Assign_499"
+ input: "^Assign_500"
+ input: "^Assign_501"
+ input: "^Assign_502"
+ input: "^Assign_503"
+ input: "^Assign_504"
+ input: "^Assign_505"
+ input: "^Assign_506"
+ input: "^Assign_507"
+ input: "^Assign_508"
+ input: "^Assign_509"
+ input: "^Assign_510"
+ input: "^Assign_511"
+ input: "^Assign_512"
+ input: "^Assign_513"
+ input: "^Assign_514"
+ input: "^Assign_515"
+ input: "^Assign_516"
+ input: "^Assign_517"
+ input: "^Assign_518"
+ input: "^Assign_519"
+ input: "^Assign_520"
+ input: "^Assign_521"
+ input: "^Assign_522"
+ input: "^Assign_523"
+ input: "^Assign_524"
+ input: "^Assign_525"
+ input: "^Assign_526"
+ input: "^Assign_527"
+ input: "^Assign_528"
+ input: "^Assign_529"
+ input: "^Assign_530"
+ input: "^Assign_531"
+ input: "^Assign_532"
+ input: "^Assign_533"
+ input: "^Assign_534"
+ input: "^Assign_535"
+ input: "^Assign_536"
+ input: "^Assign_537"
+ input: "^Assign_538"
+ input: "^Assign_539"
+ input: "^Assign_540"
+ input: "^Assign_541"
+ input: "^Assign_542"
+ input: "^Assign_543"
+ input: "^Assign_544"
+ input: "^Assign_545"
+ input: "^Assign_546"
+ input: "^Assign_547"
+ input: "^Assign_548"
+ input: "^Assign_549"
+ input: "^Assign_550"
+ input: "^Assign_551"
+ input: "^Assign_552"
+ input: "^Assign_553"
+ input: "^Assign_554"
+ input: "^Assign_555"
+ input: "^Assign_556"
+ input: "^Assign_557"
+ input: "^Assign_558"
+ input: "^Assign_559"
+ input: "^Assign_560"
+ input: "^Assign_561"
+ input: "^Assign_562"
+ input: "^Assign_563"
+ input: "^Assign_564"
+ input: "^Assign_565"
+ input: "^Assign_566"
+ input: "^Assign_567"
+ input: "^Assign_568"
+ input: "^Assign_569"
+ input: "^Assign_570"
+ input: "^Assign_571"
+ input: "^Assign_572"
+ input: "^Assign_573"
+ input: "^Assign_574"
+ input: "^Assign_575"
+ input: "^Assign_576"
+ input: "^Assign_577"
+ input: "^Assign_578"
+ input: "^Assign_579"
+ input: "^Assign_580"
+ input: "^Assign_581"
+ input: "^Assign_582"
+ input: "^Assign_583"
+ input: "^Assign_584"
+ input: "^Assign_585"
+ input: "^Assign_586"
+ input: "^Assign_587"
+ input: "^Assign_588"
+ input: "^Assign_589"
+ input: "^Assign_590"
+ input: "^Assign_591"
+ input: "^Assign_592"
+ input: "^Assign_593"
+ input: "^Assign_594"
+ input: "^Assign_595"
+ input: "^Assign_596"
+ input: "^Assign_597"
+ input: "^Assign_598"
+ input: "^Assign_599"
+ input: "^Assign_600"
+ input: "^Assign_601"
+ input: "^Assign_602"
+ input: "^Assign_603"
+ input: "^Assign_604"
+ input: "^Assign_605"
+ input: "^Assign_606"
+ input: "^Assign_607"
+ input: "^Assign_608"
+ input: "^Assign_609"
+ input: "^Assign_610"
+ input: "^Assign_611"
+ input: "^Assign_612"
+ input: "^Assign_613"
+ input: "^Assign_614"
+ input: "^Assign_615"
+ input: "^Assign_616"
+ input: "^Assign_617"
+ input: "^Assign_618"
+ input: "^Assign_619"
+ input: "^Assign_620"
+ input: "^Assign_621"
+ input: "^Assign_622"
+ input: "^Assign_623"
+ input: "^Assign_624"
+ input: "^Assign_625"
+ input: "^Assign_626"
+ input: "^Assign_627"
+ input: "^Assign_628"
+ input: "^Assign_629"
+ input: "^Assign_630"
+ input: "^Assign_631"
+ input: "^Assign_632"
+ input: "^Assign_633"
+ input: "^Assign_634"
+ input: "^Assign_635"
+ input: "^Assign_636"
+ input: "^Assign_637"
+ input: "^Assign_638"
+ input: "^Assign_639"
+ input: "^Assign_640"
+ input: "^Assign_641"
+ input: "^Assign_642"
+ input: "^Assign_643"
+ input: "^Assign_644"
+ input: "^Assign_645"
+ input: "^Assign_646"
+ input: "^Assign_647"
+ input: "^Assign_648"
+ input: "^Assign_649"
+ input: "^Assign_650"
+ input: "^Assign_651"
+ input: "^Assign_652"
+ input: "^Assign_653"
+ input: "^Assign_654"
+ input: "^Assign_655"
+ input: "^Assign_656"
+ input: "^Assign_657"
+ input: "^Assign_658"
+ input: "^Assign_659"
+ input: "^Assign_660"
+ input: "^Assign_661"
+ input: "^Assign_662"
+ input: "^Assign_663"
+ input: "^Assign_664"
+ input: "^Assign_665"
+ input: "^Assign_666"
+ input: "^Assign_667"
+ input: "^Assign_668"
+ input: "^Assign_669"
+ input: "^Assign_670"
+ input: "^Assign_671"
+ input: "^Assign_672"
+ input: "^Assign_673"
+ input: "^Assign_674"
+ input: "^Assign_675"
+ input: "^Assign_676"
+ input: "^Assign_677"
+ input: "^Assign_678"
+ input: "^Assign_679"
+ input: "^Assign_680"
+ input: "^Assign_681"
+ input: "^Assign_682"
+ input: "^Assign_683"
+ input: "^Assign_684"
+ input: "^Assign_685"
+ input: "^Assign_686"
+ input: "^Assign_687"
+ input: "^Assign_688"
+ input: "^Assign_689"
+ input: "^Assign_690"
+ input: "^Assign_691"
+ input: "^Assign_692"
+ input: "^Assign_693"
+ input: "^Assign_694"
+ input: "^Assign_695"
+ input: "^Assign_696"
+ input: "^Assign_697"
+ input: "^Assign_698"
+ input: "^Assign_699"
+ input: "^Assign_700"
+ input: "^Assign_701"
+ input: "^Assign_702"
+ input: "^Assign_703"
+ input: "^Assign_704"
+ input: "^Assign_705"
+ input: "^Assign_706"
+ input: "^Assign_707"
+ input: "^Assign_708"
+ input: "^Assign_709"
+ input: "^Assign_710"
+ input: "^Assign_711"
+ input: "^Assign_712"
+ input: "^Assign_713"
+ input: "^Assign_714"
+ input: "^Assign_715"
+ input: "^Assign_716"
+ input: "^Assign_717"
+ input: "^Assign_718"
+ input: "^Assign_719"
+ input: "^Assign_720"
+ input: "^Assign_721"
+ input: "^Assign_722"
+ input: "^Assign_723"
+ input: "^Assign_724"
+ input: "^Assign_725"
+ input: "^Assign_726"
+ input: "^Assign_727"
+ input: "^Assign_728"
+ input: "^Assign_729"
+ input: "^Assign_730"
+ input: "^Assign_731"
+ input: "^Assign_732"
+ input: "^Assign_733"
+ input: "^Assign_734"
+ input: "^Assign_735"
+ input: "^Assign_736"
+ input: "^Assign_737"
+ input: "^Assign_738"
+ input: "^Assign_739"
+ input: "^Assign_740"
+ input: "^Assign_741"
+ input: "^Assign_742"
+ input: "^Assign_743"
+ input: "^Assign_744"
+ input: "^Assign_745"
+ input: "^Assign_746"
+ input: "^Assign_747"
+ input: "^Assign_748"
+ input: "^Assign_749"
+ input: "^Assign_750"
+ input: "^Assign_751"
+ input: "^Assign_752"
+ input: "^Assign_753"
+ input: "^Assign_754"
+ input: "^Assign_755"
+ input: "^Assign_756"
+ input: "^Assign_757"
+ input: "^Assign_758"
+ input: "^Assign_759"
+ input: "^Assign_760"
+ input: "^Assign_761"
+ input: "^Assign_762"
+ input: "^Assign_763"
+ input: "^Assign_764"
+ input: "^Assign_765"
+ input: "^Assign_766"
+ input: "^Assign_767"
+ input: "^Assign_768"
+ input: "^Assign_769"
+ input: "^Assign_770"
+ input: "^Assign_771"
+ input: "^Assign_772"
+ input: "^Assign_773"
+ input: "^Assign_774"
+ input: "^Assign_775"
+ input: "^Assign_776"
+ input: "^Assign_777"
+ input: "^Assign_778"
+ input: "^Assign_779"
+ input: "^Assign_780"
+ input: "^Assign_781"
+ input: "^Assign_782"
+ input: "^Assign_783"
+ input: "^Assign_784"
+ input: "^Assign_785"
+ input: "^Assign_786"
+ input: "^Assign_787"
+ input: "^Assign_788"
+ input: "^Assign_789"
+ input: "^Assign_790"
+ input: "^Assign_791"
+ input: "^Assign_792"
+ input: "^Assign_793"
+ input: "^Assign_794"
+ input: "^Assign_795"
+ input: "^Assign_796"
+ input: "^Assign_797"
+ input: "^Assign_798"
+ input: "^Assign_799"
+ input: "^Assign_800"
+ input: "^Assign_801"
+}
+node {
+ name: "ReadVariableOp"
+ op: "ReadVariableOp"
+ input: "global_step"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT64
+ }
+ }
+}
+node {
+ name: "add_681/y"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT64
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT64
+ tensor_shape {
+ }
+ int64_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "add_681"
+ op: "Add"
+ input: "ReadVariableOp"
+ input: "add_681/y"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT64
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "AssignVariableOp"
+ op: "AssignVariableOp"
+ input: "global_step"
+ input: "add_681"
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT64
+ }
+ }
+}
+node {
+ name: "ReadVariableOp_1"
+ op: "ReadVariableOp"
+ input: "global_step"
+ input: "^AssignVariableOp"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT64
+ }
+ }
+}
+node {
+ name: "group_deps_1"
+ op: "NoOp"
+ input: "^AssignVariableOp"
+ input: "^group_deps"
+}
+node {
+ name: "loss_1/tags"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "loss_1"
+ }
+ }
+ }
+}
+node {
+ name: "loss_1"
+ op: "ScalarSummary"
+ input: "loss_1/tags"
+ input: "loss/Mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "init"
+ op: "NoOp"
+ input: "^Assign"
+ input: "^Assign_1"
+ input: "^Assign_10"
+ input: "^Assign_100"
+ input: "^Assign_101"
+ input: "^Assign_102"
+ input: "^Assign_103"
+ input: "^Assign_104"
+ input: "^Assign_105"
+ input: "^Assign_106"
+ input: "^Assign_107"
+ input: "^Assign_108"
+ input: "^Assign_109"
+ input: "^Assign_11"
+ input: "^Assign_110"
+ input: "^Assign_111"
+ input: "^Assign_112"
+ input: "^Assign_113"
+ input: "^Assign_114"
+ input: "^Assign_115"
+ input: "^Assign_116"
+ input: "^Assign_117"
+ input: "^Assign_118"
+ input: "^Assign_119"
+ input: "^Assign_12"
+ input: "^Assign_120"
+ input: "^Assign_121"
+ input: "^Assign_122"
+ input: "^Assign_123"
+ input: "^Assign_124"
+ input: "^Assign_125"
+ input: "^Assign_126"
+ input: "^Assign_127"
+ input: "^Assign_128"
+ input: "^Assign_129"
+ input: "^Assign_13"
+ input: "^Assign_130"
+ input: "^Assign_131"
+ input: "^Assign_132"
+ input: "^Assign_133"
+ input: "^Assign_134"
+ input: "^Assign_135"
+ input: "^Assign_136"
+ input: "^Assign_137"
+ input: "^Assign_138"
+ input: "^Assign_139"
+ input: "^Assign_14"
+ input: "^Assign_140"
+ input: "^Assign_141"
+ input: "^Assign_142"
+ input: "^Assign_143"
+ input: "^Assign_144"
+ input: "^Assign_145"
+ input: "^Assign_146"
+ input: "^Assign_147"
+ input: "^Assign_148"
+ input: "^Assign_149"
+ input: "^Assign_15"
+ input: "^Assign_150"
+ input: "^Assign_151"
+ input: "^Assign_152"
+ input: "^Assign_153"
+ input: "^Assign_154"
+ input: "^Assign_155"
+ input: "^Assign_156"
+ input: "^Assign_157"
+ input: "^Assign_158"
+ input: "^Assign_159"
+ input: "^Assign_16"
+ input: "^Assign_160"
+ input: "^Assign_161"
+ input: "^Assign_162"
+ input: "^Assign_163"
+ input: "^Assign_164"
+ input: "^Assign_165"
+ input: "^Assign_166"
+ input: "^Assign_167"
+ input: "^Assign_168"
+ input: "^Assign_169"
+ input: "^Assign_17"
+ input: "^Assign_170"
+ input: "^Assign_171"
+ input: "^Assign_172"
+ input: "^Assign_173"
+ input: "^Assign_174"
+ input: "^Assign_175"
+ input: "^Assign_176"
+ input: "^Assign_177"
+ input: "^Assign_178"
+ input: "^Assign_179"
+ input: "^Assign_18"
+ input: "^Assign_180"
+ input: "^Assign_181"
+ input: "^Assign_182"
+ input: "^Assign_183"
+ input: "^Assign_184"
+ input: "^Assign_185"
+ input: "^Assign_186"
+ input: "^Assign_187"
+ input: "^Assign_188"
+ input: "^Assign_189"
+ input: "^Assign_19"
+ input: "^Assign_190"
+ input: "^Assign_191"
+ input: "^Assign_192"
+ input: "^Assign_193"
+ input: "^Assign_194"
+ input: "^Assign_195"
+ input: "^Assign_196"
+ input: "^Assign_197"
+ input: "^Assign_198"
+ input: "^Assign_2"
+ input: "^Assign_20"
+ input: "^Assign_21"
+ input: "^Assign_22"
+ input: "^Assign_23"
+ input: "^Assign_24"
+ input: "^Assign_25"
+ input: "^Assign_26"
+ input: "^Assign_27"
+ input: "^Assign_28"
+ input: "^Assign_29"
+ input: "^Assign_3"
+ input: "^Assign_30"
+ input: "^Assign_31"
+ input: "^Assign_32"
+ input: "^Assign_33"
+ input: "^Assign_34"
+ input: "^Assign_35"
+ input: "^Assign_36"
+ input: "^Assign_37"
+ input: "^Assign_38"
+ input: "^Assign_39"
+ input: "^Assign_4"
+ input: "^Assign_40"
+ input: "^Assign_41"
+ input: "^Assign_42"
+ input: "^Assign_43"
+ input: "^Assign_44"
+ input: "^Assign_45"
+ input: "^Assign_46"
+ input: "^Assign_47"
+ input: "^Assign_48"
+ input: "^Assign_49"
+ input: "^Assign_5"
+ input: "^Assign_50"
+ input: "^Assign_51"
+ input: "^Assign_52"
+ input: "^Assign_53"
+ input: "^Assign_54"
+ input: "^Assign_55"
+ input: "^Assign_56"
+ input: "^Assign_57"
+ input: "^Assign_58"
+ input: "^Assign_59"
+ input: "^Assign_6"
+ input: "^Assign_60"
+ input: "^Assign_61"
+ input: "^Assign_62"
+ input: "^Assign_63"
+ input: "^Assign_64"
+ input: "^Assign_65"
+ input: "^Assign_66"
+ input: "^Assign_67"
+ input: "^Assign_68"
+ input: "^Assign_69"
+ input: "^Assign_7"
+ input: "^Assign_70"
+ input: "^Assign_71"
+ input: "^Assign_72"
+ input: "^Assign_73"
+ input: "^Assign_74"
+ input: "^Assign_75"
+ input: "^Assign_76"
+ input: "^Assign_77"
+ input: "^Assign_78"
+ input: "^Assign_79"
+ input: "^Assign_8"
+ input: "^Assign_80"
+ input: "^Assign_81"
+ input: "^Assign_82"
+ input: "^Assign_83"
+ input: "^Assign_84"
+ input: "^Assign_85"
+ input: "^Assign_86"
+ input: "^Assign_87"
+ input: "^Assign_88"
+ input: "^Assign_89"
+ input: "^Assign_9"
+ input: "^Assign_90"
+ input: "^Assign_91"
+ input: "^Assign_92"
+ input: "^Assign_93"
+ input: "^Assign_94"
+ input: "^Assign_95"
+ input: "^Assign_96"
+ input: "^Assign_97"
+ input: "^Assign_98"
+ input: "^Assign_99"
+ input: "^bert/embeddings/LayerNorm/beta/adam_m/Assign"
+ input: "^bert/embeddings/LayerNorm/beta/adam_v/Assign"
+ input: "^bert/embeddings/LayerNorm/gamma/adam_m/Assign"
+ input: "^bert/embeddings/LayerNorm/gamma/adam_v/Assign"
+ input: "^bert/embeddings/position_embeddings/adam_m/Assign"
+ input: "^bert/embeddings/position_embeddings/adam_v/Assign"
+ input: "^bert/embeddings/token_type_embeddings/adam_m/Assign"
+ input: "^bert/embeddings/token_type_embeddings/adam_v/Assign"
+ input: "^bert/embeddings/word_embeddings/adam_m/Assign"
+ input: "^bert/embeddings/word_embeddings/adam_v/Assign"
+ input: "^bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m/Assign"
+ input: "^bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v/Assign"
+ input: "^bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m/Assign"
+ input: "^bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v/Assign"
+ input: "^bert/encoder/layer_0/attention/output/dense/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_0/attention/output/dense/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_0/attention/output/dense/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_0/attention/output/dense/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_0/attention/self/key/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_0/attention/self/key/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_0/attention/self/key/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_0/attention/self/key/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_0/attention/self/query/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_0/attention/self/query/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_0/attention/self/query/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_0/attention/self/query/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_0/attention/self/value/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_0/attention/self/value/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_0/attention/self/value/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_0/attention/self/value/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_0/intermediate/dense/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_0/intermediate/dense/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_0/intermediate/dense/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_0/intermediate/dense/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_0/output/LayerNorm/beta/adam_m/Assign"
+ input: "^bert/encoder/layer_0/output/LayerNorm/beta/adam_v/Assign"
+ input: "^bert/encoder/layer_0/output/LayerNorm/gamma/adam_m/Assign"
+ input: "^bert/encoder/layer_0/output/LayerNorm/gamma/adam_v/Assign"
+ input: "^bert/encoder/layer_0/output/dense/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_0/output/dense/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_0/output/dense/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_0/output/dense/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m/Assign"
+ input: "^bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v/Assign"
+ input: "^bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m/Assign"
+ input: "^bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v/Assign"
+ input: "^bert/encoder/layer_1/attention/output/dense/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_1/attention/output/dense/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_1/attention/output/dense/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_1/attention/output/dense/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_1/attention/self/key/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_1/attention/self/key/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_1/attention/self/key/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_1/attention/self/key/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_1/attention/self/query/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_1/attention/self/query/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_1/attention/self/query/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_1/attention/self/query/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_1/attention/self/value/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_1/attention/self/value/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_1/attention/self/value/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_1/attention/self/value/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_1/intermediate/dense/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_1/intermediate/dense/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_1/intermediate/dense/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_1/intermediate/dense/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_1/output/LayerNorm/beta/adam_m/Assign"
+ input: "^bert/encoder/layer_1/output/LayerNorm/beta/adam_v/Assign"
+ input: "^bert/encoder/layer_1/output/LayerNorm/gamma/adam_m/Assign"
+ input: "^bert/encoder/layer_1/output/LayerNorm/gamma/adam_v/Assign"
+ input: "^bert/encoder/layer_1/output/dense/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_1/output/dense/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_1/output/dense/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_1/output/dense/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m/Assign"
+ input: "^bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v/Assign"
+ input: "^bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m/Assign"
+ input: "^bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v/Assign"
+ input: "^bert/encoder/layer_10/attention/output/dense/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_10/attention/output/dense/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_10/attention/output/dense/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_10/attention/output/dense/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_10/attention/self/key/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_10/attention/self/key/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_10/attention/self/key/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_10/attention/self/key/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_10/attention/self/query/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_10/attention/self/query/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_10/attention/self/query/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_10/attention/self/query/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_10/attention/self/value/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_10/attention/self/value/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_10/attention/self/value/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_10/attention/self/value/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_10/intermediate/dense/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_10/intermediate/dense/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_10/intermediate/dense/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_10/intermediate/dense/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_10/output/LayerNorm/beta/adam_m/Assign"
+ input: "^bert/encoder/layer_10/output/LayerNorm/beta/adam_v/Assign"
+ input: "^bert/encoder/layer_10/output/LayerNorm/gamma/adam_m/Assign"
+ input: "^bert/encoder/layer_10/output/LayerNorm/gamma/adam_v/Assign"
+ input: "^bert/encoder/layer_10/output/dense/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_10/output/dense/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_10/output/dense/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_10/output/dense/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m/Assign"
+ input: "^bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v/Assign"
+ input: "^bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m/Assign"
+ input: "^bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v/Assign"
+ input: "^bert/encoder/layer_11/attention/output/dense/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_11/attention/output/dense/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_11/attention/output/dense/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_11/attention/output/dense/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_11/attention/self/key/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_11/attention/self/key/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_11/attention/self/key/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_11/attention/self/key/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_11/attention/self/query/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_11/attention/self/query/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_11/attention/self/query/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_11/attention/self/query/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_11/attention/self/value/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_11/attention/self/value/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_11/attention/self/value/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_11/attention/self/value/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_11/intermediate/dense/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_11/intermediate/dense/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_11/intermediate/dense/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_11/intermediate/dense/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_11/output/LayerNorm/beta/adam_m/Assign"
+ input: "^bert/encoder/layer_11/output/LayerNorm/beta/adam_v/Assign"
+ input: "^bert/encoder/layer_11/output/LayerNorm/gamma/adam_m/Assign"
+ input: "^bert/encoder/layer_11/output/LayerNorm/gamma/adam_v/Assign"
+ input: "^bert/encoder/layer_11/output/dense/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_11/output/dense/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_11/output/dense/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_11/output/dense/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m/Assign"
+ input: "^bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v/Assign"
+ input: "^bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m/Assign"
+ input: "^bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v/Assign"
+ input: "^bert/encoder/layer_2/attention/output/dense/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_2/attention/output/dense/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_2/attention/output/dense/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_2/attention/output/dense/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_2/attention/self/key/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_2/attention/self/key/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_2/attention/self/key/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_2/attention/self/key/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_2/attention/self/query/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_2/attention/self/query/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_2/attention/self/query/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_2/attention/self/query/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_2/attention/self/value/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_2/attention/self/value/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_2/attention/self/value/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_2/attention/self/value/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_2/intermediate/dense/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_2/intermediate/dense/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_2/intermediate/dense/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_2/intermediate/dense/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_2/output/LayerNorm/beta/adam_m/Assign"
+ input: "^bert/encoder/layer_2/output/LayerNorm/beta/adam_v/Assign"
+ input: "^bert/encoder/layer_2/output/LayerNorm/gamma/adam_m/Assign"
+ input: "^bert/encoder/layer_2/output/LayerNorm/gamma/adam_v/Assign"
+ input: "^bert/encoder/layer_2/output/dense/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_2/output/dense/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_2/output/dense/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_2/output/dense/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m/Assign"
+ input: "^bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v/Assign"
+ input: "^bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m/Assign"
+ input: "^bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v/Assign"
+ input: "^bert/encoder/layer_3/attention/output/dense/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_3/attention/output/dense/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_3/attention/output/dense/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_3/attention/output/dense/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_3/attention/self/key/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_3/attention/self/key/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_3/attention/self/key/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_3/attention/self/key/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_3/attention/self/query/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_3/attention/self/query/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_3/attention/self/query/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_3/attention/self/query/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_3/attention/self/value/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_3/attention/self/value/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_3/attention/self/value/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_3/attention/self/value/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_3/intermediate/dense/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_3/intermediate/dense/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_3/intermediate/dense/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_3/intermediate/dense/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_3/output/LayerNorm/beta/adam_m/Assign"
+ input: "^bert/encoder/layer_3/output/LayerNorm/beta/adam_v/Assign"
+ input: "^bert/encoder/layer_3/output/LayerNorm/gamma/adam_m/Assign"
+ input: "^bert/encoder/layer_3/output/LayerNorm/gamma/adam_v/Assign"
+ input: "^bert/encoder/layer_3/output/dense/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_3/output/dense/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_3/output/dense/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_3/output/dense/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m/Assign"
+ input: "^bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v/Assign"
+ input: "^bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m/Assign"
+ input: "^bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v/Assign"
+ input: "^bert/encoder/layer_4/attention/output/dense/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_4/attention/output/dense/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_4/attention/output/dense/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_4/attention/output/dense/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_4/attention/self/key/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_4/attention/self/key/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_4/attention/self/key/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_4/attention/self/key/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_4/attention/self/query/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_4/attention/self/query/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_4/attention/self/query/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_4/attention/self/query/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_4/attention/self/value/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_4/attention/self/value/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_4/attention/self/value/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_4/attention/self/value/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_4/intermediate/dense/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_4/intermediate/dense/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_4/intermediate/dense/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_4/intermediate/dense/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_4/output/LayerNorm/beta/adam_m/Assign"
+ input: "^bert/encoder/layer_4/output/LayerNorm/beta/adam_v/Assign"
+ input: "^bert/encoder/layer_4/output/LayerNorm/gamma/adam_m/Assign"
+ input: "^bert/encoder/layer_4/output/LayerNorm/gamma/adam_v/Assign"
+ input: "^bert/encoder/layer_4/output/dense/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_4/output/dense/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_4/output/dense/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_4/output/dense/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m/Assign"
+ input: "^bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v/Assign"
+ input: "^bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m/Assign"
+ input: "^bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v/Assign"
+ input: "^bert/encoder/layer_5/attention/output/dense/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_5/attention/output/dense/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_5/attention/output/dense/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_5/attention/output/dense/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_5/attention/self/key/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_5/attention/self/key/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_5/attention/self/key/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_5/attention/self/key/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_5/attention/self/query/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_5/attention/self/query/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_5/attention/self/query/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_5/attention/self/query/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_5/attention/self/value/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_5/attention/self/value/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_5/attention/self/value/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_5/attention/self/value/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_5/intermediate/dense/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_5/intermediate/dense/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_5/intermediate/dense/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_5/intermediate/dense/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_5/output/LayerNorm/beta/adam_m/Assign"
+ input: "^bert/encoder/layer_5/output/LayerNorm/beta/adam_v/Assign"
+ input: "^bert/encoder/layer_5/output/LayerNorm/gamma/adam_m/Assign"
+ input: "^bert/encoder/layer_5/output/LayerNorm/gamma/adam_v/Assign"
+ input: "^bert/encoder/layer_5/output/dense/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_5/output/dense/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_5/output/dense/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_5/output/dense/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m/Assign"
+ input: "^bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v/Assign"
+ input: "^bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m/Assign"
+ input: "^bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v/Assign"
+ input: "^bert/encoder/layer_6/attention/output/dense/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_6/attention/output/dense/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_6/attention/output/dense/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_6/attention/output/dense/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_6/attention/self/key/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_6/attention/self/key/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_6/attention/self/key/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_6/attention/self/key/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_6/attention/self/query/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_6/attention/self/query/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_6/attention/self/query/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_6/attention/self/query/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_6/attention/self/value/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_6/attention/self/value/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_6/attention/self/value/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_6/attention/self/value/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_6/intermediate/dense/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_6/intermediate/dense/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_6/intermediate/dense/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_6/intermediate/dense/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_6/output/LayerNorm/beta/adam_m/Assign"
+ input: "^bert/encoder/layer_6/output/LayerNorm/beta/adam_v/Assign"
+ input: "^bert/encoder/layer_6/output/LayerNorm/gamma/adam_m/Assign"
+ input: "^bert/encoder/layer_6/output/LayerNorm/gamma/adam_v/Assign"
+ input: "^bert/encoder/layer_6/output/dense/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_6/output/dense/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_6/output/dense/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_6/output/dense/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m/Assign"
+ input: "^bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v/Assign"
+ input: "^bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m/Assign"
+ input: "^bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v/Assign"
+ input: "^bert/encoder/layer_7/attention/output/dense/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_7/attention/output/dense/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_7/attention/output/dense/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_7/attention/output/dense/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_7/attention/self/key/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_7/attention/self/key/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_7/attention/self/key/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_7/attention/self/key/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_7/attention/self/query/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_7/attention/self/query/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_7/attention/self/query/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_7/attention/self/query/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_7/attention/self/value/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_7/attention/self/value/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_7/attention/self/value/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_7/attention/self/value/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_7/intermediate/dense/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_7/intermediate/dense/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_7/intermediate/dense/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_7/intermediate/dense/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_7/output/LayerNorm/beta/adam_m/Assign"
+ input: "^bert/encoder/layer_7/output/LayerNorm/beta/adam_v/Assign"
+ input: "^bert/encoder/layer_7/output/LayerNorm/gamma/adam_m/Assign"
+ input: "^bert/encoder/layer_7/output/LayerNorm/gamma/adam_v/Assign"
+ input: "^bert/encoder/layer_7/output/dense/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_7/output/dense/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_7/output/dense/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_7/output/dense/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m/Assign"
+ input: "^bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v/Assign"
+ input: "^bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m/Assign"
+ input: "^bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v/Assign"
+ input: "^bert/encoder/layer_8/attention/output/dense/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_8/attention/output/dense/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_8/attention/output/dense/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_8/attention/output/dense/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_8/attention/self/key/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_8/attention/self/key/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_8/attention/self/key/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_8/attention/self/key/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_8/attention/self/query/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_8/attention/self/query/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_8/attention/self/query/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_8/attention/self/query/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_8/attention/self/value/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_8/attention/self/value/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_8/attention/self/value/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_8/attention/self/value/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_8/intermediate/dense/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_8/intermediate/dense/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_8/intermediate/dense/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_8/intermediate/dense/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_8/output/LayerNorm/beta/adam_m/Assign"
+ input: "^bert/encoder/layer_8/output/LayerNorm/beta/adam_v/Assign"
+ input: "^bert/encoder/layer_8/output/LayerNorm/gamma/adam_m/Assign"
+ input: "^bert/encoder/layer_8/output/LayerNorm/gamma/adam_v/Assign"
+ input: "^bert/encoder/layer_8/output/dense/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_8/output/dense/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_8/output/dense/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_8/output/dense/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m/Assign"
+ input: "^bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v/Assign"
+ input: "^bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m/Assign"
+ input: "^bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v/Assign"
+ input: "^bert/encoder/layer_9/attention/output/dense/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_9/attention/output/dense/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_9/attention/output/dense/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_9/attention/output/dense/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_9/attention/self/key/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_9/attention/self/key/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_9/attention/self/key/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_9/attention/self/key/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_9/attention/self/query/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_9/attention/self/query/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_9/attention/self/query/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_9/attention/self/query/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_9/attention/self/value/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_9/attention/self/value/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_9/attention/self/value/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_9/attention/self/value/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_9/intermediate/dense/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_9/intermediate/dense/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_9/intermediate/dense/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_9/intermediate/dense/kernel/adam_v/Assign"
+ input: "^bert/encoder/layer_9/output/LayerNorm/beta/adam_m/Assign"
+ input: "^bert/encoder/layer_9/output/LayerNorm/beta/adam_v/Assign"
+ input: "^bert/encoder/layer_9/output/LayerNorm/gamma/adam_m/Assign"
+ input: "^bert/encoder/layer_9/output/LayerNorm/gamma/adam_v/Assign"
+ input: "^bert/encoder/layer_9/output/dense/bias/adam_m/Assign"
+ input: "^bert/encoder/layer_9/output/dense/bias/adam_v/Assign"
+ input: "^bert/encoder/layer_9/output/dense/kernel/adam_m/Assign"
+ input: "^bert/encoder/layer_9/output/dense/kernel/adam_v/Assign"
+ input: "^bert/pooler/dense/bias/adam_m/Assign"
+ input: "^bert/pooler/dense/bias/adam_v/Assign"
+ input: "^bert/pooler/dense/kernel/adam_m/Assign"
+ input: "^bert/pooler/dense/kernel/adam_v/Assign"
+ input: "^global_step/Assign"
+ input: "^output_bias/Assign"
+ input: "^output_bias/adam_m/Assign"
+ input: "^output_bias/adam_v/Assign"
+ input: "^output_weights/Assign"
+ input: "^output_weights/adam_m/Assign"
+ input: "^output_weights/adam_v/Assign"
+}
+node {
+ name: "init_1"
+ op: "NoOp"
+}
+node {
+ name: "group_deps_2"
+ op: "NoOp"
+ input: "^init"
+ input: "^init_1"
+}
+node {
+ name: "report_uninitialized_variables/VarIsInitializedOp"
+ op: "VarIsInitializedOp"
+ input: "global_step"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized"
+ op: "IsVariableInitialized"
+ input: "bert/embeddings/word_embeddings"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/word_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_1"
+ op: "IsVariableInitialized"
+ input: "bert/embeddings/token_type_embeddings"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/token_type_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_2"
+ op: "IsVariableInitialized"
+ input: "bert/embeddings/position_embeddings"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/position_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_3"
+ op: "IsVariableInitialized"
+ input: "bert/embeddings/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_4"
+ op: "IsVariableInitialized"
+ input: "bert/embeddings/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_5"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/self/query/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_6"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/self/query/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_7"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/self/key/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_8"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/self/key/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_9"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/self/value/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_10"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/self/value/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_11"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_12"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_13"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_14"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_15"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/intermediate/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_16"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/intermediate/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_17"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_18"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_19"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_20"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_21"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/self/query/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_22"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/self/query/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_23"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/self/key/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_24"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/self/key/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_25"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/self/value/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_26"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/self/value/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_27"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_28"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_29"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_30"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_31"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/intermediate/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_32"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/intermediate/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_33"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_34"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_35"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_36"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_37"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/self/query/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_38"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/self/query/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_39"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/self/key/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_40"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/self/key/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_41"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/self/value/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_42"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/self/value/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_43"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_44"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_45"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_46"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_47"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/intermediate/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_48"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/intermediate/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_49"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_50"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_51"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_52"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_53"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/self/query/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_54"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/self/query/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_55"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/self/key/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_56"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/self/key/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_57"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/self/value/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_58"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/self/value/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_59"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_60"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_61"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_62"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_63"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/intermediate/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_64"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/intermediate/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_65"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_66"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_67"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_68"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_69"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/self/query/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_70"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/self/query/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_71"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/self/key/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_72"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/self/key/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_73"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/self/value/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_74"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/self/value/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_75"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_76"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_77"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_78"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_79"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/intermediate/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_80"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/intermediate/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_81"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_82"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_83"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_84"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_85"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/self/query/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_86"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/self/query/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_87"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/self/key/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_88"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/self/key/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_89"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/self/value/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_90"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/self/value/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_91"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_92"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_93"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_94"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_95"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/intermediate/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_96"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/intermediate/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_97"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_98"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_99"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_100"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_101"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/self/query/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_102"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/self/query/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_103"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/self/key/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_104"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/self/key/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_105"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/self/value/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_106"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/self/value/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_107"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_108"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_109"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_110"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_111"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/intermediate/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_112"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/intermediate/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_113"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_114"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_115"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_116"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_117"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/self/query/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_118"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/self/query/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_119"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/self/key/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_120"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/self/key/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_121"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/self/value/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_122"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/self/value/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_123"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_124"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_125"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_126"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_127"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/intermediate/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_128"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/intermediate/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_129"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_130"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_131"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_132"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_133"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/self/query/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_134"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/self/query/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_135"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/self/key/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_136"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/self/key/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_137"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/self/value/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_138"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/self/value/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_139"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_140"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_141"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_142"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_143"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/intermediate/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_144"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/intermediate/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_145"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_146"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_147"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_148"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_149"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/self/query/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_150"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/self/query/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_151"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/self/key/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_152"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/self/key/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_153"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/self/value/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_154"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/self/value/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_155"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_156"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_157"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_158"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_159"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/intermediate/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_160"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/intermediate/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_161"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_162"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_163"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_164"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_165"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/self/query/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_166"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/self/query/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_167"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/self/key/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_168"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/self/key/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_169"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/self/value/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_170"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/self/value/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_171"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_172"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_173"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_174"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_175"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/intermediate/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_176"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/intermediate/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_177"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_178"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_179"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_180"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_181"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/self/query/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_182"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/self/query/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_183"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/self/key/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_184"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/self/key/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_185"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/self/value/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_186"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/self/value/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_187"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_188"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_189"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_190"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_191"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/intermediate/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_192"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/intermediate/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_193"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_194"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_195"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_196"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_197"
+ op: "IsVariableInitialized"
+ input: "bert/pooler/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_198"
+ op: "IsVariableInitialized"
+ input: "bert/pooler/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_199"
+ op: "IsVariableInitialized"
+ input: "output_weights"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_weights"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_200"
+ op: "IsVariableInitialized"
+ input: "output_bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_201"
+ op: "IsVariableInitialized"
+ input: "bert/embeddings/word_embeddings/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/word_embeddings/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_202"
+ op: "IsVariableInitialized"
+ input: "bert/embeddings/word_embeddings/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/word_embeddings/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_203"
+ op: "IsVariableInitialized"
+ input: "bert/embeddings/token_type_embeddings/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/token_type_embeddings/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_204"
+ op: "IsVariableInitialized"
+ input: "bert/embeddings/token_type_embeddings/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/token_type_embeddings/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_205"
+ op: "IsVariableInitialized"
+ input: "bert/embeddings/position_embeddings/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/position_embeddings/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_206"
+ op: "IsVariableInitialized"
+ input: "bert/embeddings/position_embeddings/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/position_embeddings/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_207"
+ op: "IsVariableInitialized"
+ input: "bert/embeddings/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_208"
+ op: "IsVariableInitialized"
+ input: "bert/embeddings/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_209"
+ op: "IsVariableInitialized"
+ input: "bert/embeddings/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_210"
+ op: "IsVariableInitialized"
+ input: "bert/embeddings/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_211"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/self/query/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_212"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/self/query/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_213"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/self/query/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_214"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/self/query/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_215"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/self/key/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_216"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/self/key/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_217"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/self/key/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_218"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/self/key/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_219"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/self/value/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_220"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/self/value/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_221"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/self/value/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_222"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/self/value/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_223"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_224"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_225"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_226"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_227"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_228"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_229"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_230"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_231"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_232"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_233"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/intermediate/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_234"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/intermediate/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_235"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_236"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_237"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_238"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_239"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_240"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_241"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_242"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_243"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/self/query/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_244"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/self/query/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_245"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/self/query/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_246"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/self/query/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_247"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/self/key/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_248"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/self/key/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_249"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/self/key/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_250"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/self/key/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_251"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/self/value/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_252"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/self/value/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_253"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/self/value/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_254"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/self/value/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_255"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_256"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_257"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_258"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_259"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_260"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_261"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_262"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_263"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_264"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_265"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/intermediate/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_266"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/intermediate/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_267"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_268"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_269"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_270"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_271"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_272"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_273"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_274"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_275"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/self/query/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_276"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/self/query/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_277"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/self/query/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_278"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/self/query/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_279"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/self/key/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_280"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/self/key/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_281"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/self/key/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_282"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/self/key/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_283"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/self/value/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_284"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/self/value/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_285"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/self/value/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_286"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/self/value/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_287"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_288"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_289"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_290"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_291"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_292"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_293"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_294"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_295"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_296"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_297"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/intermediate/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_298"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/intermediate/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_299"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_300"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_301"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_302"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_303"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_304"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_305"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_306"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_307"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/self/query/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_308"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/self/query/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_309"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/self/query/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_310"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/self/query/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_311"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/self/key/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_312"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/self/key/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_313"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/self/key/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_314"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/self/key/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_315"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/self/value/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_316"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/self/value/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_317"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/self/value/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_318"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/self/value/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_319"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_320"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_321"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_322"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_323"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_324"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_325"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_326"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_327"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_328"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_329"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/intermediate/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_330"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/intermediate/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_331"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_332"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_333"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_334"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_335"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_336"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_337"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_338"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_339"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/self/query/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_340"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/self/query/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_341"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/self/query/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_342"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/self/query/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_343"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/self/key/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_344"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/self/key/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_345"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/self/key/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_346"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/self/key/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_347"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/self/value/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_348"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/self/value/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_349"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/self/value/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_350"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/self/value/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_351"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_352"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_353"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_354"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_355"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_356"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_357"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_358"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_359"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_360"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_361"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/intermediate/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_362"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/intermediate/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_363"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_364"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_365"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_366"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_367"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_368"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_369"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_370"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_371"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/self/query/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_372"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/self/query/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_373"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/self/query/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_374"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/self/query/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_375"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/self/key/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_376"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/self/key/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_377"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/self/key/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_378"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/self/key/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_379"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/self/value/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_380"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/self/value/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_381"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/self/value/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_382"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/self/value/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_383"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_384"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_385"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_386"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_387"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_388"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_389"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_390"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_391"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_392"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_393"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/intermediate/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_394"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/intermediate/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_395"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_396"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_397"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_398"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_399"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_400"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_401"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_402"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_403"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/self/query/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_404"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/self/query/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_405"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/self/query/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_406"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/self/query/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_407"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/self/key/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_408"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/self/key/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_409"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/self/key/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_410"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/self/key/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_411"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/self/value/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_412"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/self/value/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_413"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/self/value/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_414"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/self/value/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_415"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_416"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_417"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_418"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_419"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_420"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_421"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_422"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_423"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_424"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_425"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/intermediate/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_426"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/intermediate/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_427"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_428"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_429"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_430"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_431"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_432"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_433"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_434"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_435"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/self/query/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_436"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/self/query/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_437"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/self/query/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_438"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/self/query/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_439"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/self/key/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_440"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/self/key/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_441"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/self/key/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_442"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/self/key/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_443"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/self/value/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_444"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/self/value/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_445"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/self/value/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_446"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/self/value/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_447"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_448"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_449"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_450"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_451"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_452"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_453"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_454"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_455"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_456"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_457"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/intermediate/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_458"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/intermediate/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_459"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_460"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_461"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_462"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_463"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_464"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_465"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_466"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_467"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/self/query/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_468"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/self/query/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_469"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/self/query/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_470"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/self/query/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_471"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/self/key/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_472"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/self/key/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_473"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/self/key/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_474"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/self/key/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_475"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/self/value/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_476"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/self/value/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_477"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/self/value/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_478"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/self/value/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_479"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_480"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_481"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_482"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_483"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_484"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_485"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_486"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_487"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_488"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_489"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/intermediate/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_490"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/intermediate/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_491"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_492"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_493"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_494"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_495"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_496"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_497"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_498"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_499"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/self/query/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_500"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/self/query/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_501"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/self/query/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_502"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/self/query/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_503"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/self/key/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_504"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/self/key/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_505"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/self/key/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_506"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/self/key/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_507"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/self/value/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_508"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/self/value/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_509"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/self/value/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_510"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/self/value/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_511"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_512"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_513"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_514"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_515"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_516"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_517"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_518"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_519"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_520"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_521"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/intermediate/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_522"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/intermediate/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_523"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_524"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_525"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_526"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_527"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_528"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_529"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_530"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_531"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/self/query/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_532"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/self/query/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_533"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/self/query/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_534"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/self/query/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_535"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/self/key/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_536"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/self/key/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_537"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/self/key/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_538"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/self/key/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_539"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/self/value/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_540"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/self/value/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_541"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/self/value/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_542"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/self/value/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_543"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_544"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_545"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_546"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_547"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_548"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_549"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_550"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_551"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_552"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_553"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/intermediate/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_554"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/intermediate/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_555"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_556"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_557"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_558"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_559"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_560"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_561"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_562"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_563"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/self/query/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_564"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/self/query/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_565"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/self/query/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_566"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/self/query/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_567"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/self/key/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_568"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/self/key/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_569"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/self/key/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_570"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/self/key/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_571"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/self/value/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_572"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/self/value/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_573"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/self/value/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_574"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/self/value/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_575"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_576"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_577"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_578"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_579"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_580"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_581"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_582"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_583"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_584"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_585"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/intermediate/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_586"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/intermediate/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_587"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_588"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_589"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_590"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_591"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_592"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_593"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_594"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_595"
+ op: "IsVariableInitialized"
+ input: "bert/pooler/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_596"
+ op: "IsVariableInitialized"
+ input: "bert/pooler/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_597"
+ op: "IsVariableInitialized"
+ input: "bert/pooler/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_598"
+ op: "IsVariableInitialized"
+ input: "bert/pooler/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_599"
+ op: "IsVariableInitialized"
+ input: "output_weights/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_weights/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_600"
+ op: "IsVariableInitialized"
+ input: "output_weights/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_weights/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_601"
+ op: "IsVariableInitialized"
+ input: "output_bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/IsVariableInitialized_602"
+ op: "IsVariableInitialized"
+ input: "output_bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/stack"
+ op: "Pack"
+ input: "report_uninitialized_variables/VarIsInitializedOp"
+ input: "report_uninitialized_variables/IsVariableInitialized"
+ input: "report_uninitialized_variables/IsVariableInitialized_1"
+ input: "report_uninitialized_variables/IsVariableInitialized_2"
+ input: "report_uninitialized_variables/IsVariableInitialized_3"
+ input: "report_uninitialized_variables/IsVariableInitialized_4"
+ input: "report_uninitialized_variables/IsVariableInitialized_5"
+ input: "report_uninitialized_variables/IsVariableInitialized_6"
+ input: "report_uninitialized_variables/IsVariableInitialized_7"
+ input: "report_uninitialized_variables/IsVariableInitialized_8"
+ input: "report_uninitialized_variables/IsVariableInitialized_9"
+ input: "report_uninitialized_variables/IsVariableInitialized_10"
+ input: "report_uninitialized_variables/IsVariableInitialized_11"
+ input: "report_uninitialized_variables/IsVariableInitialized_12"
+ input: "report_uninitialized_variables/IsVariableInitialized_13"
+ input: "report_uninitialized_variables/IsVariableInitialized_14"
+ input: "report_uninitialized_variables/IsVariableInitialized_15"
+ input: "report_uninitialized_variables/IsVariableInitialized_16"
+ input: "report_uninitialized_variables/IsVariableInitialized_17"
+ input: "report_uninitialized_variables/IsVariableInitialized_18"
+ input: "report_uninitialized_variables/IsVariableInitialized_19"
+ input: "report_uninitialized_variables/IsVariableInitialized_20"
+ input: "report_uninitialized_variables/IsVariableInitialized_21"
+ input: "report_uninitialized_variables/IsVariableInitialized_22"
+ input: "report_uninitialized_variables/IsVariableInitialized_23"
+ input: "report_uninitialized_variables/IsVariableInitialized_24"
+ input: "report_uninitialized_variables/IsVariableInitialized_25"
+ input: "report_uninitialized_variables/IsVariableInitialized_26"
+ input: "report_uninitialized_variables/IsVariableInitialized_27"
+ input: "report_uninitialized_variables/IsVariableInitialized_28"
+ input: "report_uninitialized_variables/IsVariableInitialized_29"
+ input: "report_uninitialized_variables/IsVariableInitialized_30"
+ input: "report_uninitialized_variables/IsVariableInitialized_31"
+ input: "report_uninitialized_variables/IsVariableInitialized_32"
+ input: "report_uninitialized_variables/IsVariableInitialized_33"
+ input: "report_uninitialized_variables/IsVariableInitialized_34"
+ input: "report_uninitialized_variables/IsVariableInitialized_35"
+ input: "report_uninitialized_variables/IsVariableInitialized_36"
+ input: "report_uninitialized_variables/IsVariableInitialized_37"
+ input: "report_uninitialized_variables/IsVariableInitialized_38"
+ input: "report_uninitialized_variables/IsVariableInitialized_39"
+ input: "report_uninitialized_variables/IsVariableInitialized_40"
+ input: "report_uninitialized_variables/IsVariableInitialized_41"
+ input: "report_uninitialized_variables/IsVariableInitialized_42"
+ input: "report_uninitialized_variables/IsVariableInitialized_43"
+ input: "report_uninitialized_variables/IsVariableInitialized_44"
+ input: "report_uninitialized_variables/IsVariableInitialized_45"
+ input: "report_uninitialized_variables/IsVariableInitialized_46"
+ input: "report_uninitialized_variables/IsVariableInitialized_47"
+ input: "report_uninitialized_variables/IsVariableInitialized_48"
+ input: "report_uninitialized_variables/IsVariableInitialized_49"
+ input: "report_uninitialized_variables/IsVariableInitialized_50"
+ input: "report_uninitialized_variables/IsVariableInitialized_51"
+ input: "report_uninitialized_variables/IsVariableInitialized_52"
+ input: "report_uninitialized_variables/IsVariableInitialized_53"
+ input: "report_uninitialized_variables/IsVariableInitialized_54"
+ input: "report_uninitialized_variables/IsVariableInitialized_55"
+ input: "report_uninitialized_variables/IsVariableInitialized_56"
+ input: "report_uninitialized_variables/IsVariableInitialized_57"
+ input: "report_uninitialized_variables/IsVariableInitialized_58"
+ input: "report_uninitialized_variables/IsVariableInitialized_59"
+ input: "report_uninitialized_variables/IsVariableInitialized_60"
+ input: "report_uninitialized_variables/IsVariableInitialized_61"
+ input: "report_uninitialized_variables/IsVariableInitialized_62"
+ input: "report_uninitialized_variables/IsVariableInitialized_63"
+ input: "report_uninitialized_variables/IsVariableInitialized_64"
+ input: "report_uninitialized_variables/IsVariableInitialized_65"
+ input: "report_uninitialized_variables/IsVariableInitialized_66"
+ input: "report_uninitialized_variables/IsVariableInitialized_67"
+ input: "report_uninitialized_variables/IsVariableInitialized_68"
+ input: "report_uninitialized_variables/IsVariableInitialized_69"
+ input: "report_uninitialized_variables/IsVariableInitialized_70"
+ input: "report_uninitialized_variables/IsVariableInitialized_71"
+ input: "report_uninitialized_variables/IsVariableInitialized_72"
+ input: "report_uninitialized_variables/IsVariableInitialized_73"
+ input: "report_uninitialized_variables/IsVariableInitialized_74"
+ input: "report_uninitialized_variables/IsVariableInitialized_75"
+ input: "report_uninitialized_variables/IsVariableInitialized_76"
+ input: "report_uninitialized_variables/IsVariableInitialized_77"
+ input: "report_uninitialized_variables/IsVariableInitialized_78"
+ input: "report_uninitialized_variables/IsVariableInitialized_79"
+ input: "report_uninitialized_variables/IsVariableInitialized_80"
+ input: "report_uninitialized_variables/IsVariableInitialized_81"
+ input: "report_uninitialized_variables/IsVariableInitialized_82"
+ input: "report_uninitialized_variables/IsVariableInitialized_83"
+ input: "report_uninitialized_variables/IsVariableInitialized_84"
+ input: "report_uninitialized_variables/IsVariableInitialized_85"
+ input: "report_uninitialized_variables/IsVariableInitialized_86"
+ input: "report_uninitialized_variables/IsVariableInitialized_87"
+ input: "report_uninitialized_variables/IsVariableInitialized_88"
+ input: "report_uninitialized_variables/IsVariableInitialized_89"
+ input: "report_uninitialized_variables/IsVariableInitialized_90"
+ input: "report_uninitialized_variables/IsVariableInitialized_91"
+ input: "report_uninitialized_variables/IsVariableInitialized_92"
+ input: "report_uninitialized_variables/IsVariableInitialized_93"
+ input: "report_uninitialized_variables/IsVariableInitialized_94"
+ input: "report_uninitialized_variables/IsVariableInitialized_95"
+ input: "report_uninitialized_variables/IsVariableInitialized_96"
+ input: "report_uninitialized_variables/IsVariableInitialized_97"
+ input: "report_uninitialized_variables/IsVariableInitialized_98"
+ input: "report_uninitialized_variables/IsVariableInitialized_99"
+ input: "report_uninitialized_variables/IsVariableInitialized_100"
+ input: "report_uninitialized_variables/IsVariableInitialized_101"
+ input: "report_uninitialized_variables/IsVariableInitialized_102"
+ input: "report_uninitialized_variables/IsVariableInitialized_103"
+ input: "report_uninitialized_variables/IsVariableInitialized_104"
+ input: "report_uninitialized_variables/IsVariableInitialized_105"
+ input: "report_uninitialized_variables/IsVariableInitialized_106"
+ input: "report_uninitialized_variables/IsVariableInitialized_107"
+ input: "report_uninitialized_variables/IsVariableInitialized_108"
+ input: "report_uninitialized_variables/IsVariableInitialized_109"
+ input: "report_uninitialized_variables/IsVariableInitialized_110"
+ input: "report_uninitialized_variables/IsVariableInitialized_111"
+ input: "report_uninitialized_variables/IsVariableInitialized_112"
+ input: "report_uninitialized_variables/IsVariableInitialized_113"
+ input: "report_uninitialized_variables/IsVariableInitialized_114"
+ input: "report_uninitialized_variables/IsVariableInitialized_115"
+ input: "report_uninitialized_variables/IsVariableInitialized_116"
+ input: "report_uninitialized_variables/IsVariableInitialized_117"
+ input: "report_uninitialized_variables/IsVariableInitialized_118"
+ input: "report_uninitialized_variables/IsVariableInitialized_119"
+ input: "report_uninitialized_variables/IsVariableInitialized_120"
+ input: "report_uninitialized_variables/IsVariableInitialized_121"
+ input: "report_uninitialized_variables/IsVariableInitialized_122"
+ input: "report_uninitialized_variables/IsVariableInitialized_123"
+ input: "report_uninitialized_variables/IsVariableInitialized_124"
+ input: "report_uninitialized_variables/IsVariableInitialized_125"
+ input: "report_uninitialized_variables/IsVariableInitialized_126"
+ input: "report_uninitialized_variables/IsVariableInitialized_127"
+ input: "report_uninitialized_variables/IsVariableInitialized_128"
+ input: "report_uninitialized_variables/IsVariableInitialized_129"
+ input: "report_uninitialized_variables/IsVariableInitialized_130"
+ input: "report_uninitialized_variables/IsVariableInitialized_131"
+ input: "report_uninitialized_variables/IsVariableInitialized_132"
+ input: "report_uninitialized_variables/IsVariableInitialized_133"
+ input: "report_uninitialized_variables/IsVariableInitialized_134"
+ input: "report_uninitialized_variables/IsVariableInitialized_135"
+ input: "report_uninitialized_variables/IsVariableInitialized_136"
+ input: "report_uninitialized_variables/IsVariableInitialized_137"
+ input: "report_uninitialized_variables/IsVariableInitialized_138"
+ input: "report_uninitialized_variables/IsVariableInitialized_139"
+ input: "report_uninitialized_variables/IsVariableInitialized_140"
+ input: "report_uninitialized_variables/IsVariableInitialized_141"
+ input: "report_uninitialized_variables/IsVariableInitialized_142"
+ input: "report_uninitialized_variables/IsVariableInitialized_143"
+ input: "report_uninitialized_variables/IsVariableInitialized_144"
+ input: "report_uninitialized_variables/IsVariableInitialized_145"
+ input: "report_uninitialized_variables/IsVariableInitialized_146"
+ input: "report_uninitialized_variables/IsVariableInitialized_147"
+ input: "report_uninitialized_variables/IsVariableInitialized_148"
+ input: "report_uninitialized_variables/IsVariableInitialized_149"
+ input: "report_uninitialized_variables/IsVariableInitialized_150"
+ input: "report_uninitialized_variables/IsVariableInitialized_151"
+ input: "report_uninitialized_variables/IsVariableInitialized_152"
+ input: "report_uninitialized_variables/IsVariableInitialized_153"
+ input: "report_uninitialized_variables/IsVariableInitialized_154"
+ input: "report_uninitialized_variables/IsVariableInitialized_155"
+ input: "report_uninitialized_variables/IsVariableInitialized_156"
+ input: "report_uninitialized_variables/IsVariableInitialized_157"
+ input: "report_uninitialized_variables/IsVariableInitialized_158"
+ input: "report_uninitialized_variables/IsVariableInitialized_159"
+ input: "report_uninitialized_variables/IsVariableInitialized_160"
+ input: "report_uninitialized_variables/IsVariableInitialized_161"
+ input: "report_uninitialized_variables/IsVariableInitialized_162"
+ input: "report_uninitialized_variables/IsVariableInitialized_163"
+ input: "report_uninitialized_variables/IsVariableInitialized_164"
+ input: "report_uninitialized_variables/IsVariableInitialized_165"
+ input: "report_uninitialized_variables/IsVariableInitialized_166"
+ input: "report_uninitialized_variables/IsVariableInitialized_167"
+ input: "report_uninitialized_variables/IsVariableInitialized_168"
+ input: "report_uninitialized_variables/IsVariableInitialized_169"
+ input: "report_uninitialized_variables/IsVariableInitialized_170"
+ input: "report_uninitialized_variables/IsVariableInitialized_171"
+ input: "report_uninitialized_variables/IsVariableInitialized_172"
+ input: "report_uninitialized_variables/IsVariableInitialized_173"
+ input: "report_uninitialized_variables/IsVariableInitialized_174"
+ input: "report_uninitialized_variables/IsVariableInitialized_175"
+ input: "report_uninitialized_variables/IsVariableInitialized_176"
+ input: "report_uninitialized_variables/IsVariableInitialized_177"
+ input: "report_uninitialized_variables/IsVariableInitialized_178"
+ input: "report_uninitialized_variables/IsVariableInitialized_179"
+ input: "report_uninitialized_variables/IsVariableInitialized_180"
+ input: "report_uninitialized_variables/IsVariableInitialized_181"
+ input: "report_uninitialized_variables/IsVariableInitialized_182"
+ input: "report_uninitialized_variables/IsVariableInitialized_183"
+ input: "report_uninitialized_variables/IsVariableInitialized_184"
+ input: "report_uninitialized_variables/IsVariableInitialized_185"
+ input: "report_uninitialized_variables/IsVariableInitialized_186"
+ input: "report_uninitialized_variables/IsVariableInitialized_187"
+ input: "report_uninitialized_variables/IsVariableInitialized_188"
+ input: "report_uninitialized_variables/IsVariableInitialized_189"
+ input: "report_uninitialized_variables/IsVariableInitialized_190"
+ input: "report_uninitialized_variables/IsVariableInitialized_191"
+ input: "report_uninitialized_variables/IsVariableInitialized_192"
+ input: "report_uninitialized_variables/IsVariableInitialized_193"
+ input: "report_uninitialized_variables/IsVariableInitialized_194"
+ input: "report_uninitialized_variables/IsVariableInitialized_195"
+ input: "report_uninitialized_variables/IsVariableInitialized_196"
+ input: "report_uninitialized_variables/IsVariableInitialized_197"
+ input: "report_uninitialized_variables/IsVariableInitialized_198"
+ input: "report_uninitialized_variables/IsVariableInitialized_199"
+ input: "report_uninitialized_variables/IsVariableInitialized_200"
+ input: "report_uninitialized_variables/IsVariableInitialized_201"
+ input: "report_uninitialized_variables/IsVariableInitialized_202"
+ input: "report_uninitialized_variables/IsVariableInitialized_203"
+ input: "report_uninitialized_variables/IsVariableInitialized_204"
+ input: "report_uninitialized_variables/IsVariableInitialized_205"
+ input: "report_uninitialized_variables/IsVariableInitialized_206"
+ input: "report_uninitialized_variables/IsVariableInitialized_207"
+ input: "report_uninitialized_variables/IsVariableInitialized_208"
+ input: "report_uninitialized_variables/IsVariableInitialized_209"
+ input: "report_uninitialized_variables/IsVariableInitialized_210"
+ input: "report_uninitialized_variables/IsVariableInitialized_211"
+ input: "report_uninitialized_variables/IsVariableInitialized_212"
+ input: "report_uninitialized_variables/IsVariableInitialized_213"
+ input: "report_uninitialized_variables/IsVariableInitialized_214"
+ input: "report_uninitialized_variables/IsVariableInitialized_215"
+ input: "report_uninitialized_variables/IsVariableInitialized_216"
+ input: "report_uninitialized_variables/IsVariableInitialized_217"
+ input: "report_uninitialized_variables/IsVariableInitialized_218"
+ input: "report_uninitialized_variables/IsVariableInitialized_219"
+ input: "report_uninitialized_variables/IsVariableInitialized_220"
+ input: "report_uninitialized_variables/IsVariableInitialized_221"
+ input: "report_uninitialized_variables/IsVariableInitialized_222"
+ input: "report_uninitialized_variables/IsVariableInitialized_223"
+ input: "report_uninitialized_variables/IsVariableInitialized_224"
+ input: "report_uninitialized_variables/IsVariableInitialized_225"
+ input: "report_uninitialized_variables/IsVariableInitialized_226"
+ input: "report_uninitialized_variables/IsVariableInitialized_227"
+ input: "report_uninitialized_variables/IsVariableInitialized_228"
+ input: "report_uninitialized_variables/IsVariableInitialized_229"
+ input: "report_uninitialized_variables/IsVariableInitialized_230"
+ input: "report_uninitialized_variables/IsVariableInitialized_231"
+ input: "report_uninitialized_variables/IsVariableInitialized_232"
+ input: "report_uninitialized_variables/IsVariableInitialized_233"
+ input: "report_uninitialized_variables/IsVariableInitialized_234"
+ input: "report_uninitialized_variables/IsVariableInitialized_235"
+ input: "report_uninitialized_variables/IsVariableInitialized_236"
+ input: "report_uninitialized_variables/IsVariableInitialized_237"
+ input: "report_uninitialized_variables/IsVariableInitialized_238"
+ input: "report_uninitialized_variables/IsVariableInitialized_239"
+ input: "report_uninitialized_variables/IsVariableInitialized_240"
+ input: "report_uninitialized_variables/IsVariableInitialized_241"
+ input: "report_uninitialized_variables/IsVariableInitialized_242"
+ input: "report_uninitialized_variables/IsVariableInitialized_243"
+ input: "report_uninitialized_variables/IsVariableInitialized_244"
+ input: "report_uninitialized_variables/IsVariableInitialized_245"
+ input: "report_uninitialized_variables/IsVariableInitialized_246"
+ input: "report_uninitialized_variables/IsVariableInitialized_247"
+ input: "report_uninitialized_variables/IsVariableInitialized_248"
+ input: "report_uninitialized_variables/IsVariableInitialized_249"
+ input: "report_uninitialized_variables/IsVariableInitialized_250"
+ input: "report_uninitialized_variables/IsVariableInitialized_251"
+ input: "report_uninitialized_variables/IsVariableInitialized_252"
+ input: "report_uninitialized_variables/IsVariableInitialized_253"
+ input: "report_uninitialized_variables/IsVariableInitialized_254"
+ input: "report_uninitialized_variables/IsVariableInitialized_255"
+ input: "report_uninitialized_variables/IsVariableInitialized_256"
+ input: "report_uninitialized_variables/IsVariableInitialized_257"
+ input: "report_uninitialized_variables/IsVariableInitialized_258"
+ input: "report_uninitialized_variables/IsVariableInitialized_259"
+ input: "report_uninitialized_variables/IsVariableInitialized_260"
+ input: "report_uninitialized_variables/IsVariableInitialized_261"
+ input: "report_uninitialized_variables/IsVariableInitialized_262"
+ input: "report_uninitialized_variables/IsVariableInitialized_263"
+ input: "report_uninitialized_variables/IsVariableInitialized_264"
+ input: "report_uninitialized_variables/IsVariableInitialized_265"
+ input: "report_uninitialized_variables/IsVariableInitialized_266"
+ input: "report_uninitialized_variables/IsVariableInitialized_267"
+ input: "report_uninitialized_variables/IsVariableInitialized_268"
+ input: "report_uninitialized_variables/IsVariableInitialized_269"
+ input: "report_uninitialized_variables/IsVariableInitialized_270"
+ input: "report_uninitialized_variables/IsVariableInitialized_271"
+ input: "report_uninitialized_variables/IsVariableInitialized_272"
+ input: "report_uninitialized_variables/IsVariableInitialized_273"
+ input: "report_uninitialized_variables/IsVariableInitialized_274"
+ input: "report_uninitialized_variables/IsVariableInitialized_275"
+ input: "report_uninitialized_variables/IsVariableInitialized_276"
+ input: "report_uninitialized_variables/IsVariableInitialized_277"
+ input: "report_uninitialized_variables/IsVariableInitialized_278"
+ input: "report_uninitialized_variables/IsVariableInitialized_279"
+ input: "report_uninitialized_variables/IsVariableInitialized_280"
+ input: "report_uninitialized_variables/IsVariableInitialized_281"
+ input: "report_uninitialized_variables/IsVariableInitialized_282"
+ input: "report_uninitialized_variables/IsVariableInitialized_283"
+ input: "report_uninitialized_variables/IsVariableInitialized_284"
+ input: "report_uninitialized_variables/IsVariableInitialized_285"
+ input: "report_uninitialized_variables/IsVariableInitialized_286"
+ input: "report_uninitialized_variables/IsVariableInitialized_287"
+ input: "report_uninitialized_variables/IsVariableInitialized_288"
+ input: "report_uninitialized_variables/IsVariableInitialized_289"
+ input: "report_uninitialized_variables/IsVariableInitialized_290"
+ input: "report_uninitialized_variables/IsVariableInitialized_291"
+ input: "report_uninitialized_variables/IsVariableInitialized_292"
+ input: "report_uninitialized_variables/IsVariableInitialized_293"
+ input: "report_uninitialized_variables/IsVariableInitialized_294"
+ input: "report_uninitialized_variables/IsVariableInitialized_295"
+ input: "report_uninitialized_variables/IsVariableInitialized_296"
+ input: "report_uninitialized_variables/IsVariableInitialized_297"
+ input: "report_uninitialized_variables/IsVariableInitialized_298"
+ input: "report_uninitialized_variables/IsVariableInitialized_299"
+ input: "report_uninitialized_variables/IsVariableInitialized_300"
+ input: "report_uninitialized_variables/IsVariableInitialized_301"
+ input: "report_uninitialized_variables/IsVariableInitialized_302"
+ input: "report_uninitialized_variables/IsVariableInitialized_303"
+ input: "report_uninitialized_variables/IsVariableInitialized_304"
+ input: "report_uninitialized_variables/IsVariableInitialized_305"
+ input: "report_uninitialized_variables/IsVariableInitialized_306"
+ input: "report_uninitialized_variables/IsVariableInitialized_307"
+ input: "report_uninitialized_variables/IsVariableInitialized_308"
+ input: "report_uninitialized_variables/IsVariableInitialized_309"
+ input: "report_uninitialized_variables/IsVariableInitialized_310"
+ input: "report_uninitialized_variables/IsVariableInitialized_311"
+ input: "report_uninitialized_variables/IsVariableInitialized_312"
+ input: "report_uninitialized_variables/IsVariableInitialized_313"
+ input: "report_uninitialized_variables/IsVariableInitialized_314"
+ input: "report_uninitialized_variables/IsVariableInitialized_315"
+ input: "report_uninitialized_variables/IsVariableInitialized_316"
+ input: "report_uninitialized_variables/IsVariableInitialized_317"
+ input: "report_uninitialized_variables/IsVariableInitialized_318"
+ input: "report_uninitialized_variables/IsVariableInitialized_319"
+ input: "report_uninitialized_variables/IsVariableInitialized_320"
+ input: "report_uninitialized_variables/IsVariableInitialized_321"
+ input: "report_uninitialized_variables/IsVariableInitialized_322"
+ input: "report_uninitialized_variables/IsVariableInitialized_323"
+ input: "report_uninitialized_variables/IsVariableInitialized_324"
+ input: "report_uninitialized_variables/IsVariableInitialized_325"
+ input: "report_uninitialized_variables/IsVariableInitialized_326"
+ input: "report_uninitialized_variables/IsVariableInitialized_327"
+ input: "report_uninitialized_variables/IsVariableInitialized_328"
+ input: "report_uninitialized_variables/IsVariableInitialized_329"
+ input: "report_uninitialized_variables/IsVariableInitialized_330"
+ input: "report_uninitialized_variables/IsVariableInitialized_331"
+ input: "report_uninitialized_variables/IsVariableInitialized_332"
+ input: "report_uninitialized_variables/IsVariableInitialized_333"
+ input: "report_uninitialized_variables/IsVariableInitialized_334"
+ input: "report_uninitialized_variables/IsVariableInitialized_335"
+ input: "report_uninitialized_variables/IsVariableInitialized_336"
+ input: "report_uninitialized_variables/IsVariableInitialized_337"
+ input: "report_uninitialized_variables/IsVariableInitialized_338"
+ input: "report_uninitialized_variables/IsVariableInitialized_339"
+ input: "report_uninitialized_variables/IsVariableInitialized_340"
+ input: "report_uninitialized_variables/IsVariableInitialized_341"
+ input: "report_uninitialized_variables/IsVariableInitialized_342"
+ input: "report_uninitialized_variables/IsVariableInitialized_343"
+ input: "report_uninitialized_variables/IsVariableInitialized_344"
+ input: "report_uninitialized_variables/IsVariableInitialized_345"
+ input: "report_uninitialized_variables/IsVariableInitialized_346"
+ input: "report_uninitialized_variables/IsVariableInitialized_347"
+ input: "report_uninitialized_variables/IsVariableInitialized_348"
+ input: "report_uninitialized_variables/IsVariableInitialized_349"
+ input: "report_uninitialized_variables/IsVariableInitialized_350"
+ input: "report_uninitialized_variables/IsVariableInitialized_351"
+ input: "report_uninitialized_variables/IsVariableInitialized_352"
+ input: "report_uninitialized_variables/IsVariableInitialized_353"
+ input: "report_uninitialized_variables/IsVariableInitialized_354"
+ input: "report_uninitialized_variables/IsVariableInitialized_355"
+ input: "report_uninitialized_variables/IsVariableInitialized_356"
+ input: "report_uninitialized_variables/IsVariableInitialized_357"
+ input: "report_uninitialized_variables/IsVariableInitialized_358"
+ input: "report_uninitialized_variables/IsVariableInitialized_359"
+ input: "report_uninitialized_variables/IsVariableInitialized_360"
+ input: "report_uninitialized_variables/IsVariableInitialized_361"
+ input: "report_uninitialized_variables/IsVariableInitialized_362"
+ input: "report_uninitialized_variables/IsVariableInitialized_363"
+ input: "report_uninitialized_variables/IsVariableInitialized_364"
+ input: "report_uninitialized_variables/IsVariableInitialized_365"
+ input: "report_uninitialized_variables/IsVariableInitialized_366"
+ input: "report_uninitialized_variables/IsVariableInitialized_367"
+ input: "report_uninitialized_variables/IsVariableInitialized_368"
+ input: "report_uninitialized_variables/IsVariableInitialized_369"
+ input: "report_uninitialized_variables/IsVariableInitialized_370"
+ input: "report_uninitialized_variables/IsVariableInitialized_371"
+ input: "report_uninitialized_variables/IsVariableInitialized_372"
+ input: "report_uninitialized_variables/IsVariableInitialized_373"
+ input: "report_uninitialized_variables/IsVariableInitialized_374"
+ input: "report_uninitialized_variables/IsVariableInitialized_375"
+ input: "report_uninitialized_variables/IsVariableInitialized_376"
+ input: "report_uninitialized_variables/IsVariableInitialized_377"
+ input: "report_uninitialized_variables/IsVariableInitialized_378"
+ input: "report_uninitialized_variables/IsVariableInitialized_379"
+ input: "report_uninitialized_variables/IsVariableInitialized_380"
+ input: "report_uninitialized_variables/IsVariableInitialized_381"
+ input: "report_uninitialized_variables/IsVariableInitialized_382"
+ input: "report_uninitialized_variables/IsVariableInitialized_383"
+ input: "report_uninitialized_variables/IsVariableInitialized_384"
+ input: "report_uninitialized_variables/IsVariableInitialized_385"
+ input: "report_uninitialized_variables/IsVariableInitialized_386"
+ input: "report_uninitialized_variables/IsVariableInitialized_387"
+ input: "report_uninitialized_variables/IsVariableInitialized_388"
+ input: "report_uninitialized_variables/IsVariableInitialized_389"
+ input: "report_uninitialized_variables/IsVariableInitialized_390"
+ input: "report_uninitialized_variables/IsVariableInitialized_391"
+ input: "report_uninitialized_variables/IsVariableInitialized_392"
+ input: "report_uninitialized_variables/IsVariableInitialized_393"
+ input: "report_uninitialized_variables/IsVariableInitialized_394"
+ input: "report_uninitialized_variables/IsVariableInitialized_395"
+ input: "report_uninitialized_variables/IsVariableInitialized_396"
+ input: "report_uninitialized_variables/IsVariableInitialized_397"
+ input: "report_uninitialized_variables/IsVariableInitialized_398"
+ input: "report_uninitialized_variables/IsVariableInitialized_399"
+ input: "report_uninitialized_variables/IsVariableInitialized_400"
+ input: "report_uninitialized_variables/IsVariableInitialized_401"
+ input: "report_uninitialized_variables/IsVariableInitialized_402"
+ input: "report_uninitialized_variables/IsVariableInitialized_403"
+ input: "report_uninitialized_variables/IsVariableInitialized_404"
+ input: "report_uninitialized_variables/IsVariableInitialized_405"
+ input: "report_uninitialized_variables/IsVariableInitialized_406"
+ input: "report_uninitialized_variables/IsVariableInitialized_407"
+ input: "report_uninitialized_variables/IsVariableInitialized_408"
+ input: "report_uninitialized_variables/IsVariableInitialized_409"
+ input: "report_uninitialized_variables/IsVariableInitialized_410"
+ input: "report_uninitialized_variables/IsVariableInitialized_411"
+ input: "report_uninitialized_variables/IsVariableInitialized_412"
+ input: "report_uninitialized_variables/IsVariableInitialized_413"
+ input: "report_uninitialized_variables/IsVariableInitialized_414"
+ input: "report_uninitialized_variables/IsVariableInitialized_415"
+ input: "report_uninitialized_variables/IsVariableInitialized_416"
+ input: "report_uninitialized_variables/IsVariableInitialized_417"
+ input: "report_uninitialized_variables/IsVariableInitialized_418"
+ input: "report_uninitialized_variables/IsVariableInitialized_419"
+ input: "report_uninitialized_variables/IsVariableInitialized_420"
+ input: "report_uninitialized_variables/IsVariableInitialized_421"
+ input: "report_uninitialized_variables/IsVariableInitialized_422"
+ input: "report_uninitialized_variables/IsVariableInitialized_423"
+ input: "report_uninitialized_variables/IsVariableInitialized_424"
+ input: "report_uninitialized_variables/IsVariableInitialized_425"
+ input: "report_uninitialized_variables/IsVariableInitialized_426"
+ input: "report_uninitialized_variables/IsVariableInitialized_427"
+ input: "report_uninitialized_variables/IsVariableInitialized_428"
+ input: "report_uninitialized_variables/IsVariableInitialized_429"
+ input: "report_uninitialized_variables/IsVariableInitialized_430"
+ input: "report_uninitialized_variables/IsVariableInitialized_431"
+ input: "report_uninitialized_variables/IsVariableInitialized_432"
+ input: "report_uninitialized_variables/IsVariableInitialized_433"
+ input: "report_uninitialized_variables/IsVariableInitialized_434"
+ input: "report_uninitialized_variables/IsVariableInitialized_435"
+ input: "report_uninitialized_variables/IsVariableInitialized_436"
+ input: "report_uninitialized_variables/IsVariableInitialized_437"
+ input: "report_uninitialized_variables/IsVariableInitialized_438"
+ input: "report_uninitialized_variables/IsVariableInitialized_439"
+ input: "report_uninitialized_variables/IsVariableInitialized_440"
+ input: "report_uninitialized_variables/IsVariableInitialized_441"
+ input: "report_uninitialized_variables/IsVariableInitialized_442"
+ input: "report_uninitialized_variables/IsVariableInitialized_443"
+ input: "report_uninitialized_variables/IsVariableInitialized_444"
+ input: "report_uninitialized_variables/IsVariableInitialized_445"
+ input: "report_uninitialized_variables/IsVariableInitialized_446"
+ input: "report_uninitialized_variables/IsVariableInitialized_447"
+ input: "report_uninitialized_variables/IsVariableInitialized_448"
+ input: "report_uninitialized_variables/IsVariableInitialized_449"
+ input: "report_uninitialized_variables/IsVariableInitialized_450"
+ input: "report_uninitialized_variables/IsVariableInitialized_451"
+ input: "report_uninitialized_variables/IsVariableInitialized_452"
+ input: "report_uninitialized_variables/IsVariableInitialized_453"
+ input: "report_uninitialized_variables/IsVariableInitialized_454"
+ input: "report_uninitialized_variables/IsVariableInitialized_455"
+ input: "report_uninitialized_variables/IsVariableInitialized_456"
+ input: "report_uninitialized_variables/IsVariableInitialized_457"
+ input: "report_uninitialized_variables/IsVariableInitialized_458"
+ input: "report_uninitialized_variables/IsVariableInitialized_459"
+ input: "report_uninitialized_variables/IsVariableInitialized_460"
+ input: "report_uninitialized_variables/IsVariableInitialized_461"
+ input: "report_uninitialized_variables/IsVariableInitialized_462"
+ input: "report_uninitialized_variables/IsVariableInitialized_463"
+ input: "report_uninitialized_variables/IsVariableInitialized_464"
+ input: "report_uninitialized_variables/IsVariableInitialized_465"
+ input: "report_uninitialized_variables/IsVariableInitialized_466"
+ input: "report_uninitialized_variables/IsVariableInitialized_467"
+ input: "report_uninitialized_variables/IsVariableInitialized_468"
+ input: "report_uninitialized_variables/IsVariableInitialized_469"
+ input: "report_uninitialized_variables/IsVariableInitialized_470"
+ input: "report_uninitialized_variables/IsVariableInitialized_471"
+ input: "report_uninitialized_variables/IsVariableInitialized_472"
+ input: "report_uninitialized_variables/IsVariableInitialized_473"
+ input: "report_uninitialized_variables/IsVariableInitialized_474"
+ input: "report_uninitialized_variables/IsVariableInitialized_475"
+ input: "report_uninitialized_variables/IsVariableInitialized_476"
+ input: "report_uninitialized_variables/IsVariableInitialized_477"
+ input: "report_uninitialized_variables/IsVariableInitialized_478"
+ input: "report_uninitialized_variables/IsVariableInitialized_479"
+ input: "report_uninitialized_variables/IsVariableInitialized_480"
+ input: "report_uninitialized_variables/IsVariableInitialized_481"
+ input: "report_uninitialized_variables/IsVariableInitialized_482"
+ input: "report_uninitialized_variables/IsVariableInitialized_483"
+ input: "report_uninitialized_variables/IsVariableInitialized_484"
+ input: "report_uninitialized_variables/IsVariableInitialized_485"
+ input: "report_uninitialized_variables/IsVariableInitialized_486"
+ input: "report_uninitialized_variables/IsVariableInitialized_487"
+ input: "report_uninitialized_variables/IsVariableInitialized_488"
+ input: "report_uninitialized_variables/IsVariableInitialized_489"
+ input: "report_uninitialized_variables/IsVariableInitialized_490"
+ input: "report_uninitialized_variables/IsVariableInitialized_491"
+ input: "report_uninitialized_variables/IsVariableInitialized_492"
+ input: "report_uninitialized_variables/IsVariableInitialized_493"
+ input: "report_uninitialized_variables/IsVariableInitialized_494"
+ input: "report_uninitialized_variables/IsVariableInitialized_495"
+ input: "report_uninitialized_variables/IsVariableInitialized_496"
+ input: "report_uninitialized_variables/IsVariableInitialized_497"
+ input: "report_uninitialized_variables/IsVariableInitialized_498"
+ input: "report_uninitialized_variables/IsVariableInitialized_499"
+ input: "report_uninitialized_variables/IsVariableInitialized_500"
+ input: "report_uninitialized_variables/IsVariableInitialized_501"
+ input: "report_uninitialized_variables/IsVariableInitialized_502"
+ input: "report_uninitialized_variables/IsVariableInitialized_503"
+ input: "report_uninitialized_variables/IsVariableInitialized_504"
+ input: "report_uninitialized_variables/IsVariableInitialized_505"
+ input: "report_uninitialized_variables/IsVariableInitialized_506"
+ input: "report_uninitialized_variables/IsVariableInitialized_507"
+ input: "report_uninitialized_variables/IsVariableInitialized_508"
+ input: "report_uninitialized_variables/IsVariableInitialized_509"
+ input: "report_uninitialized_variables/IsVariableInitialized_510"
+ input: "report_uninitialized_variables/IsVariableInitialized_511"
+ input: "report_uninitialized_variables/IsVariableInitialized_512"
+ input: "report_uninitialized_variables/IsVariableInitialized_513"
+ input: "report_uninitialized_variables/IsVariableInitialized_514"
+ input: "report_uninitialized_variables/IsVariableInitialized_515"
+ input: "report_uninitialized_variables/IsVariableInitialized_516"
+ input: "report_uninitialized_variables/IsVariableInitialized_517"
+ input: "report_uninitialized_variables/IsVariableInitialized_518"
+ input: "report_uninitialized_variables/IsVariableInitialized_519"
+ input: "report_uninitialized_variables/IsVariableInitialized_520"
+ input: "report_uninitialized_variables/IsVariableInitialized_521"
+ input: "report_uninitialized_variables/IsVariableInitialized_522"
+ input: "report_uninitialized_variables/IsVariableInitialized_523"
+ input: "report_uninitialized_variables/IsVariableInitialized_524"
+ input: "report_uninitialized_variables/IsVariableInitialized_525"
+ input: "report_uninitialized_variables/IsVariableInitialized_526"
+ input: "report_uninitialized_variables/IsVariableInitialized_527"
+ input: "report_uninitialized_variables/IsVariableInitialized_528"
+ input: "report_uninitialized_variables/IsVariableInitialized_529"
+ input: "report_uninitialized_variables/IsVariableInitialized_530"
+ input: "report_uninitialized_variables/IsVariableInitialized_531"
+ input: "report_uninitialized_variables/IsVariableInitialized_532"
+ input: "report_uninitialized_variables/IsVariableInitialized_533"
+ input: "report_uninitialized_variables/IsVariableInitialized_534"
+ input: "report_uninitialized_variables/IsVariableInitialized_535"
+ input: "report_uninitialized_variables/IsVariableInitialized_536"
+ input: "report_uninitialized_variables/IsVariableInitialized_537"
+ input: "report_uninitialized_variables/IsVariableInitialized_538"
+ input: "report_uninitialized_variables/IsVariableInitialized_539"
+ input: "report_uninitialized_variables/IsVariableInitialized_540"
+ input: "report_uninitialized_variables/IsVariableInitialized_541"
+ input: "report_uninitialized_variables/IsVariableInitialized_542"
+ input: "report_uninitialized_variables/IsVariableInitialized_543"
+ input: "report_uninitialized_variables/IsVariableInitialized_544"
+ input: "report_uninitialized_variables/IsVariableInitialized_545"
+ input: "report_uninitialized_variables/IsVariableInitialized_546"
+ input: "report_uninitialized_variables/IsVariableInitialized_547"
+ input: "report_uninitialized_variables/IsVariableInitialized_548"
+ input: "report_uninitialized_variables/IsVariableInitialized_549"
+ input: "report_uninitialized_variables/IsVariableInitialized_550"
+ input: "report_uninitialized_variables/IsVariableInitialized_551"
+ input: "report_uninitialized_variables/IsVariableInitialized_552"
+ input: "report_uninitialized_variables/IsVariableInitialized_553"
+ input: "report_uninitialized_variables/IsVariableInitialized_554"
+ input: "report_uninitialized_variables/IsVariableInitialized_555"
+ input: "report_uninitialized_variables/IsVariableInitialized_556"
+ input: "report_uninitialized_variables/IsVariableInitialized_557"
+ input: "report_uninitialized_variables/IsVariableInitialized_558"
+ input: "report_uninitialized_variables/IsVariableInitialized_559"
+ input: "report_uninitialized_variables/IsVariableInitialized_560"
+ input: "report_uninitialized_variables/IsVariableInitialized_561"
+ input: "report_uninitialized_variables/IsVariableInitialized_562"
+ input: "report_uninitialized_variables/IsVariableInitialized_563"
+ input: "report_uninitialized_variables/IsVariableInitialized_564"
+ input: "report_uninitialized_variables/IsVariableInitialized_565"
+ input: "report_uninitialized_variables/IsVariableInitialized_566"
+ input: "report_uninitialized_variables/IsVariableInitialized_567"
+ input: "report_uninitialized_variables/IsVariableInitialized_568"
+ input: "report_uninitialized_variables/IsVariableInitialized_569"
+ input: "report_uninitialized_variables/IsVariableInitialized_570"
+ input: "report_uninitialized_variables/IsVariableInitialized_571"
+ input: "report_uninitialized_variables/IsVariableInitialized_572"
+ input: "report_uninitialized_variables/IsVariableInitialized_573"
+ input: "report_uninitialized_variables/IsVariableInitialized_574"
+ input: "report_uninitialized_variables/IsVariableInitialized_575"
+ input: "report_uninitialized_variables/IsVariableInitialized_576"
+ input: "report_uninitialized_variables/IsVariableInitialized_577"
+ input: "report_uninitialized_variables/IsVariableInitialized_578"
+ input: "report_uninitialized_variables/IsVariableInitialized_579"
+ input: "report_uninitialized_variables/IsVariableInitialized_580"
+ input: "report_uninitialized_variables/IsVariableInitialized_581"
+ input: "report_uninitialized_variables/IsVariableInitialized_582"
+ input: "report_uninitialized_variables/IsVariableInitialized_583"
+ input: "report_uninitialized_variables/IsVariableInitialized_584"
+ input: "report_uninitialized_variables/IsVariableInitialized_585"
+ input: "report_uninitialized_variables/IsVariableInitialized_586"
+ input: "report_uninitialized_variables/IsVariableInitialized_587"
+ input: "report_uninitialized_variables/IsVariableInitialized_588"
+ input: "report_uninitialized_variables/IsVariableInitialized_589"
+ input: "report_uninitialized_variables/IsVariableInitialized_590"
+ input: "report_uninitialized_variables/IsVariableInitialized_591"
+ input: "report_uninitialized_variables/IsVariableInitialized_592"
+ input: "report_uninitialized_variables/IsVariableInitialized_593"
+ input: "report_uninitialized_variables/IsVariableInitialized_594"
+ input: "report_uninitialized_variables/IsVariableInitialized_595"
+ input: "report_uninitialized_variables/IsVariableInitialized_596"
+ input: "report_uninitialized_variables/IsVariableInitialized_597"
+ input: "report_uninitialized_variables/IsVariableInitialized_598"
+ input: "report_uninitialized_variables/IsVariableInitialized_599"
+ input: "report_uninitialized_variables/IsVariableInitialized_600"
+ input: "report_uninitialized_variables/IsVariableInitialized_601"
+ input: "report_uninitialized_variables/IsVariableInitialized_602"
+ device: "/device:CPU:0"
+ attr {
+ key: "N"
+ value {
+ i: 604
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 604
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "axis"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/LogicalNot"
+ op: "LogicalNot"
+ input: "report_uninitialized_variables/stack"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 604
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/Const"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 604
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 604
+ }
+ }
+ string_val: "global_step"
+ string_val: "bert/embeddings/word_embeddings"
+ string_val: "bert/embeddings/token_type_embeddings"
+ string_val: "bert/embeddings/position_embeddings"
+ string_val: "bert/embeddings/LayerNorm/beta"
+ string_val: "bert/embeddings/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_0/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_0/attention/self/query/bias"
+ string_val: "bert/encoder/layer_0/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_0/attention/self/key/bias"
+ string_val: "bert/encoder/layer_0/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_0/attention/self/value/bias"
+ string_val: "bert/encoder/layer_0/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_0/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_0/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_0/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_0/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_0/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_0/output/dense/kernel"
+ string_val: "bert/encoder/layer_0/output/dense/bias"
+ string_val: "bert/encoder/layer_0/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_0/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_1/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_1/attention/self/query/bias"
+ string_val: "bert/encoder/layer_1/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_1/attention/self/key/bias"
+ string_val: "bert/encoder/layer_1/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_1/attention/self/value/bias"
+ string_val: "bert/encoder/layer_1/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_1/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_1/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_1/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_1/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_1/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_1/output/dense/kernel"
+ string_val: "bert/encoder/layer_1/output/dense/bias"
+ string_val: "bert/encoder/layer_1/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_1/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_2/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_2/attention/self/query/bias"
+ string_val: "bert/encoder/layer_2/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_2/attention/self/key/bias"
+ string_val: "bert/encoder/layer_2/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_2/attention/self/value/bias"
+ string_val: "bert/encoder/layer_2/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_2/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_2/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_2/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_2/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_2/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_2/output/dense/kernel"
+ string_val: "bert/encoder/layer_2/output/dense/bias"
+ string_val: "bert/encoder/layer_2/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_2/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_3/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_3/attention/self/query/bias"
+ string_val: "bert/encoder/layer_3/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_3/attention/self/key/bias"
+ string_val: "bert/encoder/layer_3/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_3/attention/self/value/bias"
+ string_val: "bert/encoder/layer_3/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_3/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_3/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_3/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_3/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_3/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_3/output/dense/kernel"
+ string_val: "bert/encoder/layer_3/output/dense/bias"
+ string_val: "bert/encoder/layer_3/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_3/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_4/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_4/attention/self/query/bias"
+ string_val: "bert/encoder/layer_4/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_4/attention/self/key/bias"
+ string_val: "bert/encoder/layer_4/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_4/attention/self/value/bias"
+ string_val: "bert/encoder/layer_4/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_4/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_4/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_4/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_4/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_4/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_4/output/dense/kernel"
+ string_val: "bert/encoder/layer_4/output/dense/bias"
+ string_val: "bert/encoder/layer_4/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_4/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_5/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_5/attention/self/query/bias"
+ string_val: "bert/encoder/layer_5/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_5/attention/self/key/bias"
+ string_val: "bert/encoder/layer_5/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_5/attention/self/value/bias"
+ string_val: "bert/encoder/layer_5/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_5/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_5/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_5/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_5/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_5/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_5/output/dense/kernel"
+ string_val: "bert/encoder/layer_5/output/dense/bias"
+ string_val: "bert/encoder/layer_5/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_5/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_6/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_6/attention/self/query/bias"
+ string_val: "bert/encoder/layer_6/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_6/attention/self/key/bias"
+ string_val: "bert/encoder/layer_6/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_6/attention/self/value/bias"
+ string_val: "bert/encoder/layer_6/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_6/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_6/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_6/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_6/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_6/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_6/output/dense/kernel"
+ string_val: "bert/encoder/layer_6/output/dense/bias"
+ string_val: "bert/encoder/layer_6/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_6/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_7/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_7/attention/self/query/bias"
+ string_val: "bert/encoder/layer_7/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_7/attention/self/key/bias"
+ string_val: "bert/encoder/layer_7/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_7/attention/self/value/bias"
+ string_val: "bert/encoder/layer_7/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_7/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_7/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_7/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_7/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_7/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_7/output/dense/kernel"
+ string_val: "bert/encoder/layer_7/output/dense/bias"
+ string_val: "bert/encoder/layer_7/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_7/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_8/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_8/attention/self/query/bias"
+ string_val: "bert/encoder/layer_8/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_8/attention/self/key/bias"
+ string_val: "bert/encoder/layer_8/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_8/attention/self/value/bias"
+ string_val: "bert/encoder/layer_8/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_8/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_8/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_8/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_8/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_8/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_8/output/dense/kernel"
+ string_val: "bert/encoder/layer_8/output/dense/bias"
+ string_val: "bert/encoder/layer_8/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_8/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_9/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_9/attention/self/query/bias"
+ string_val: "bert/encoder/layer_9/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_9/attention/self/key/bias"
+ string_val: "bert/encoder/layer_9/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_9/attention/self/value/bias"
+ string_val: "bert/encoder/layer_9/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_9/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_9/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_9/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_9/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_9/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_9/output/dense/kernel"
+ string_val: "bert/encoder/layer_9/output/dense/bias"
+ string_val: "bert/encoder/layer_9/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_9/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_10/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_10/attention/self/query/bias"
+ string_val: "bert/encoder/layer_10/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_10/attention/self/key/bias"
+ string_val: "bert/encoder/layer_10/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_10/attention/self/value/bias"
+ string_val: "bert/encoder/layer_10/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_10/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_10/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_10/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_10/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_10/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_10/output/dense/kernel"
+ string_val: "bert/encoder/layer_10/output/dense/bias"
+ string_val: "bert/encoder/layer_10/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_10/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_11/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_11/attention/self/query/bias"
+ string_val: "bert/encoder/layer_11/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_11/attention/self/key/bias"
+ string_val: "bert/encoder/layer_11/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_11/attention/self/value/bias"
+ string_val: "bert/encoder/layer_11/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_11/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_11/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_11/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_11/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_11/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_11/output/dense/kernel"
+ string_val: "bert/encoder/layer_11/output/dense/bias"
+ string_val: "bert/encoder/layer_11/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_11/output/LayerNorm/gamma"
+ string_val: "bert/pooler/dense/kernel"
+ string_val: "bert/pooler/dense/bias"
+ string_val: "output_weights"
+ string_val: "output_bias"
+ string_val: "bert/embeddings/word_embeddings/adam_m"
+ string_val: "bert/embeddings/word_embeddings/adam_v"
+ string_val: "bert/embeddings/token_type_embeddings/adam_m"
+ string_val: "bert/embeddings/token_type_embeddings/adam_v"
+ string_val: "bert/embeddings/position_embeddings/adam_m"
+ string_val: "bert/embeddings/position_embeddings/adam_v"
+ string_val: "bert/embeddings/LayerNorm/beta/adam_m"
+ string_val: "bert/embeddings/LayerNorm/beta/adam_v"
+ string_val: "bert/embeddings/LayerNorm/gamma/adam_m"
+ string_val: "bert/embeddings/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_0/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_0/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_0/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_0/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_0/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_0/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_0/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_0/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_0/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_0/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_0/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_0/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_0/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_0/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_0/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_0/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_0/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_0/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_0/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_0/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_1/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_1/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_1/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_1/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_1/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_1/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_1/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_1/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_1/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_1/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_1/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_1/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_1/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_1/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_1/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_1/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_1/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_1/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_1/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_1/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_2/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_2/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_2/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_2/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_2/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_2/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_2/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_2/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_2/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_2/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_2/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_2/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_2/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_2/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_2/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_2/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_2/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_2/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_2/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_2/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_3/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_3/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_3/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_3/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_3/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_3/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_3/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_3/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_3/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_3/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_3/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_3/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_3/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_3/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_3/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_3/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_3/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_3/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_3/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_3/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_4/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_4/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_4/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_4/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_4/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_4/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_4/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_4/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_4/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_4/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_4/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_4/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_4/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_4/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_4/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_4/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_4/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_4/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_4/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_4/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_5/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_5/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_5/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_5/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_5/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_5/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_5/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_5/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_5/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_5/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_5/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_5/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_5/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_5/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_5/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_5/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_5/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_5/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_5/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_5/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_6/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_6/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_6/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_6/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_6/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_6/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_6/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_6/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_6/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_6/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_6/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_6/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_6/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_6/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_6/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_6/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_6/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_6/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_6/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_6/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_7/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_7/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_7/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_7/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_7/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_7/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_7/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_7/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_7/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_7/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_7/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_7/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_7/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_7/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_7/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_7/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_7/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_7/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_7/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_7/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_8/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_8/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_8/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_8/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_8/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_8/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_8/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_8/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_8/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_8/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_8/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_8/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_8/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_8/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_8/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_8/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_8/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_8/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_8/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_8/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_9/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_9/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_9/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_9/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_9/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_9/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_9/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_9/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_9/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_9/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_9/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_9/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_9/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_9/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_9/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_9/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_9/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_9/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_9/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_9/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_10/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_10/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_10/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_10/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_10/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_10/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_10/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_10/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_10/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_10/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_10/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_10/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_10/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_10/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_10/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_10/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_10/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_10/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_10/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_10/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_11/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_11/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_11/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_11/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_11/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_11/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_11/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_11/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_11/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_11/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_11/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_11/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_11/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_11/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_11/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_11/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_11/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_11/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_11/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_11/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/pooler/dense/kernel/adam_m"
+ string_val: "bert/pooler/dense/kernel/adam_v"
+ string_val: "bert/pooler/dense/bias/adam_m"
+ string_val: "bert/pooler/dense/bias/adam_v"
+ string_val: "output_weights/adam_m"
+ string_val: "output_weights/adam_v"
+ string_val: "output_bias/adam_m"
+ string_val: "output_bias/adam_v"
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/boolean_mask/Shape"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 604
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/boolean_mask/strided_slice/stack"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/boolean_mask/strided_slice/stack_1"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/boolean_mask/strided_slice/stack_2"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/boolean_mask/strided_slice"
+ op: "StridedSlice"
+ input: "report_uninitialized_variables/boolean_mask/Shape"
+ input: "report_uninitialized_variables/boolean_mask/strided_slice/stack"
+ input: "report_uninitialized_variables/boolean_mask/strided_slice/stack_1"
+ input: "report_uninitialized_variables/boolean_mask/strided_slice/stack_2"
+ device: "/device:CPU:0"
+ attr {
+ key: "Index"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "begin_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "ellipsis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "end_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "new_axis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "shrink_axis_mask"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/boolean_mask/Prod/reduction_indices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/boolean_mask/Prod"
+ op: "Prod"
+ input: "report_uninitialized_variables/boolean_mask/strided_slice"
+ input: "report_uninitialized_variables/boolean_mask/Prod/reduction_indices"
+ device: "/device:CPU:0"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/boolean_mask/Shape_1"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 604
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/boolean_mask/strided_slice_1/stack"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/boolean_mask/strided_slice_1/stack_1"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/boolean_mask/strided_slice_1/stack_2"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/boolean_mask/strided_slice_1"
+ op: "StridedSlice"
+ input: "report_uninitialized_variables/boolean_mask/Shape_1"
+ input: "report_uninitialized_variables/boolean_mask/strided_slice_1/stack"
+ input: "report_uninitialized_variables/boolean_mask/strided_slice_1/stack_1"
+ input: "report_uninitialized_variables/boolean_mask/strided_slice_1/stack_2"
+ device: "/device:CPU:0"
+ attr {
+ key: "Index"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "begin_mask"
+ value {
+ i: 1
+ }
+ }
+ attr {
+ key: "ellipsis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "end_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "new_axis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "shrink_axis_mask"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/boolean_mask/Shape_2"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 604
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/boolean_mask/strided_slice_2/stack"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/boolean_mask/strided_slice_2/stack_1"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/boolean_mask/strided_slice_2/stack_2"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/boolean_mask/strided_slice_2"
+ op: "StridedSlice"
+ input: "report_uninitialized_variables/boolean_mask/Shape_2"
+ input: "report_uninitialized_variables/boolean_mask/strided_slice_2/stack"
+ input: "report_uninitialized_variables/boolean_mask/strided_slice_2/stack_1"
+ input: "report_uninitialized_variables/boolean_mask/strided_slice_2/stack_2"
+ device: "/device:CPU:0"
+ attr {
+ key: "Index"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "begin_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "ellipsis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "end_mask"
+ value {
+ i: 1
+ }
+ }
+ attr {
+ key: "new_axis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "shrink_axis_mask"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/boolean_mask/concat/values_1"
+ op: "Pack"
+ input: "report_uninitialized_variables/boolean_mask/Prod"
+ device: "/device:CPU:0"
+ attr {
+ key: "N"
+ value {
+ i: 1
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "axis"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/boolean_mask/concat/axis"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/boolean_mask/concat"
+ op: "ConcatV2"
+ input: "report_uninitialized_variables/boolean_mask/strided_slice_1"
+ input: "report_uninitialized_variables/boolean_mask/concat/values_1"
+ input: "report_uninitialized_variables/boolean_mask/strided_slice_2"
+ input: "report_uninitialized_variables/boolean_mask/concat/axis"
+ device: "/device:CPU:0"
+ attr {
+ key: "N"
+ value {
+ i: 3
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/boolean_mask/Reshape"
+ op: "Reshape"
+ input: "report_uninitialized_variables/Const"
+ input: "report_uninitialized_variables/boolean_mask/concat"
+ device: "/device:CPU:0"
+ attr {
+ key: "T"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 604
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/boolean_mask/Reshape_1/shape"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: -1
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/boolean_mask/Reshape_1"
+ op: "Reshape"
+ input: "report_uninitialized_variables/LogicalNot"
+ input: "report_uninitialized_variables/boolean_mask/Reshape_1/shape"
+ device: "/device:CPU:0"
+ attr {
+ key: "T"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 604
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/boolean_mask/Where"
+ op: "Where"
+ input: "report_uninitialized_variables/boolean_mask/Reshape_1"
+ device: "/device:CPU:0"
+ attr {
+ key: "T"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/boolean_mask/Squeeze"
+ op: "Squeeze"
+ input: "report_uninitialized_variables/boolean_mask/Where"
+ device: "/device:CPU:0"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT64
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "squeeze_dims"
+ value {
+ list {
+ i: 1
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/boolean_mask/GatherV2/axis"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables/boolean_mask/GatherV2"
+ op: "GatherV2"
+ input: "report_uninitialized_variables/boolean_mask/Reshape"
+ input: "report_uninitialized_variables/boolean_mask/Squeeze"
+ input: "report_uninitialized_variables/boolean_mask/GatherV2/axis"
+ device: "/device:CPU:0"
+ attr {
+ key: "Taxis"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "Tindices"
+ value {
+ type: DT_INT64
+ }
+ }
+ attr {
+ key: "Tparams"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "batch_dims"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "report_uninitialized_resources/Const"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "concat/axis"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "concat"
+ op: "ConcatV2"
+ input: "report_uninitialized_variables/boolean_mask/GatherV2"
+ input: "report_uninitialized_resources/Const"
+ input: "concat/axis"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/VarIsInitializedOp"
+ op: "VarIsInitializedOp"
+ input: "global_step"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized"
+ op: "IsVariableInitialized"
+ input: "bert/embeddings/word_embeddings"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/word_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_1"
+ op: "IsVariableInitialized"
+ input: "bert/embeddings/token_type_embeddings"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/token_type_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_2"
+ op: "IsVariableInitialized"
+ input: "bert/embeddings/position_embeddings"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/position_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_3"
+ op: "IsVariableInitialized"
+ input: "bert/embeddings/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_4"
+ op: "IsVariableInitialized"
+ input: "bert/embeddings/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_5"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/self/query/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_6"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/self/query/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_7"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/self/key/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_8"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/self/key/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_9"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/self/value/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_10"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/self/value/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_11"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_12"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_13"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_14"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_15"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/intermediate/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_16"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/intermediate/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_17"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_18"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_19"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_20"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_21"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/self/query/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_22"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/self/query/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_23"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/self/key/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_24"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/self/key/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_25"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/self/value/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_26"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/self/value/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_27"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_28"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_29"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_30"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_31"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/intermediate/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_32"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/intermediate/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_33"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_34"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_35"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_36"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_37"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/self/query/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_38"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/self/query/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_39"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/self/key/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_40"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/self/key/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_41"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/self/value/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_42"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/self/value/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_43"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_44"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_45"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_46"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_47"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/intermediate/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_48"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/intermediate/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_49"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_50"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_51"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_52"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_53"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/self/query/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_54"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/self/query/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_55"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/self/key/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_56"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/self/key/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_57"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/self/value/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_58"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/self/value/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_59"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_60"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_61"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_62"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_63"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/intermediate/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_64"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/intermediate/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_65"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_66"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_67"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_68"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_69"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/self/query/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_70"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/self/query/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_71"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/self/key/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_72"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/self/key/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_73"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/self/value/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_74"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/self/value/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_75"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_76"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_77"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_78"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_79"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/intermediate/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_80"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/intermediate/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_81"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_82"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_83"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_84"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_85"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/self/query/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_86"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/self/query/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_87"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/self/key/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_88"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/self/key/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_89"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/self/value/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_90"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/self/value/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_91"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_92"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_93"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_94"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_95"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/intermediate/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_96"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/intermediate/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_97"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_98"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_99"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_100"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_101"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/self/query/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_102"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/self/query/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_103"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/self/key/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_104"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/self/key/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_105"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/self/value/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_106"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/self/value/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_107"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_108"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_109"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_110"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_111"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/intermediate/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_112"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/intermediate/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_113"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_114"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_115"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_116"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_117"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/self/query/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_118"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/self/query/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_119"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/self/key/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_120"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/self/key/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_121"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/self/value/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_122"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/self/value/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_123"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_124"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_125"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_126"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_127"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/intermediate/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_128"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/intermediate/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_129"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_130"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_131"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_132"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_133"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/self/query/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_134"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/self/query/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_135"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/self/key/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_136"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/self/key/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_137"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/self/value/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_138"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/self/value/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_139"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_140"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_141"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_142"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_143"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/intermediate/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_144"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/intermediate/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_145"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_146"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_147"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_148"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_149"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/self/query/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_150"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/self/query/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_151"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/self/key/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_152"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/self/key/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_153"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/self/value/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_154"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/self/value/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_155"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_156"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_157"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_158"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_159"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/intermediate/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_160"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/intermediate/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_161"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_162"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_163"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_164"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_165"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/self/query/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_166"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/self/query/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_167"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/self/key/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_168"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/self/key/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_169"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/self/value/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_170"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/self/value/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_171"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_172"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_173"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_174"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_175"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/intermediate/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_176"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/intermediate/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_177"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_178"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_179"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_180"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_181"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/self/query/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_182"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/self/query/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_183"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/self/key/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_184"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/self/key/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_185"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/self/value/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_186"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/self/value/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_187"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_188"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_189"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_190"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_191"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/intermediate/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_192"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/intermediate/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_193"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/output/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_194"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/output/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_195"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/output/LayerNorm/beta"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_196"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/output/LayerNorm/gamma"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_197"
+ op: "IsVariableInitialized"
+ input: "bert/pooler/dense/kernel"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_198"
+ op: "IsVariableInitialized"
+ input: "bert/pooler/dense/bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_199"
+ op: "IsVariableInitialized"
+ input: "output_weights"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_weights"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_200"
+ op: "IsVariableInitialized"
+ input: "output_bias"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_201"
+ op: "IsVariableInitialized"
+ input: "bert/embeddings/word_embeddings/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/word_embeddings/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_202"
+ op: "IsVariableInitialized"
+ input: "bert/embeddings/word_embeddings/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/word_embeddings/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_203"
+ op: "IsVariableInitialized"
+ input: "bert/embeddings/token_type_embeddings/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/token_type_embeddings/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_204"
+ op: "IsVariableInitialized"
+ input: "bert/embeddings/token_type_embeddings/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/token_type_embeddings/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_205"
+ op: "IsVariableInitialized"
+ input: "bert/embeddings/position_embeddings/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/position_embeddings/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_206"
+ op: "IsVariableInitialized"
+ input: "bert/embeddings/position_embeddings/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/position_embeddings/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_207"
+ op: "IsVariableInitialized"
+ input: "bert/embeddings/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_208"
+ op: "IsVariableInitialized"
+ input: "bert/embeddings/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_209"
+ op: "IsVariableInitialized"
+ input: "bert/embeddings/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_210"
+ op: "IsVariableInitialized"
+ input: "bert/embeddings/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_211"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/self/query/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_212"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/self/query/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_213"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/self/query/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_214"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/self/query/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_215"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/self/key/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_216"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/self/key/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_217"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/self/key/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_218"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/self/key/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_219"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/self/value/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_220"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/self/value/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_221"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/self/value/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_222"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/self/value/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_223"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_224"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_225"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_226"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_227"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_228"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_229"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_230"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_231"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_232"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_233"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/intermediate/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_234"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/intermediate/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_235"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_236"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_237"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_238"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_239"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_240"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_241"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_242"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_243"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/self/query/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_244"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/self/query/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_245"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/self/query/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_246"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/self/query/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_247"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/self/key/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_248"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/self/key/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_249"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/self/key/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_250"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/self/key/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_251"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/self/value/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_252"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/self/value/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_253"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/self/value/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_254"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/self/value/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_255"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_256"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_257"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_258"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_259"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_260"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_261"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_262"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_263"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_264"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_265"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/intermediate/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_266"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/intermediate/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_267"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_268"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_269"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_270"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_271"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_272"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_273"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_274"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_275"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/self/query/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_276"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/self/query/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_277"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/self/query/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_278"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/self/query/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_279"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/self/key/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_280"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/self/key/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_281"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/self/key/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_282"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/self/key/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_283"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/self/value/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_284"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/self/value/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_285"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/self/value/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_286"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/self/value/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_287"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_288"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_289"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_290"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_291"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_292"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_293"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_294"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_295"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_296"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_297"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/intermediate/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_298"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/intermediate/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_299"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_300"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_301"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_302"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_303"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_304"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_305"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_306"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_307"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/self/query/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_308"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/self/query/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_309"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/self/query/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_310"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/self/query/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_311"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/self/key/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_312"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/self/key/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_313"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/self/key/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_314"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/self/key/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_315"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/self/value/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_316"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/self/value/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_317"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/self/value/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_318"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/self/value/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_319"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_320"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_321"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_322"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_323"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_324"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_325"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_326"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_327"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_328"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_329"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/intermediate/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_330"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/intermediate/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_331"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_332"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_333"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_334"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_335"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_336"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_337"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_338"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_339"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/self/query/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_340"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/self/query/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_341"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/self/query/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_342"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/self/query/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_343"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/self/key/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_344"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/self/key/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_345"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/self/key/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_346"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/self/key/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_347"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/self/value/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_348"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/self/value/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_349"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/self/value/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_350"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/self/value/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_351"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_352"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_353"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_354"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_355"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_356"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_357"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_358"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_359"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_360"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_361"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/intermediate/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_362"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/intermediate/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_363"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_364"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_365"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_366"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_367"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_368"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_369"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_370"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_371"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/self/query/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_372"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/self/query/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_373"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/self/query/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_374"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/self/query/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_375"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/self/key/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_376"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/self/key/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_377"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/self/key/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_378"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/self/key/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_379"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/self/value/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_380"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/self/value/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_381"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/self/value/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_382"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/self/value/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_383"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_384"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_385"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_386"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_387"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_388"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_389"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_390"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_391"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_392"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_393"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/intermediate/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_394"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/intermediate/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_395"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_396"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_397"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_398"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_399"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_400"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_401"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_402"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_403"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/self/query/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_404"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/self/query/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_405"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/self/query/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_406"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/self/query/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_407"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/self/key/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_408"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/self/key/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_409"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/self/key/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_410"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/self/key/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_411"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/self/value/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_412"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/self/value/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_413"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/self/value/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_414"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/self/value/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_415"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_416"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_417"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_418"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_419"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_420"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_421"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_422"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_423"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_424"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_425"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/intermediate/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_426"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/intermediate/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_427"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_428"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_429"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_430"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_431"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_432"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_433"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_434"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_435"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/self/query/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_436"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/self/query/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_437"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/self/query/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_438"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/self/query/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_439"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/self/key/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_440"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/self/key/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_441"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/self/key/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_442"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/self/key/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_443"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/self/value/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_444"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/self/value/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_445"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/self/value/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_446"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/self/value/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_447"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_448"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_449"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_450"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_451"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_452"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_453"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_454"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_455"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_456"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_457"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/intermediate/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_458"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/intermediate/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_459"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_460"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_461"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_462"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_463"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_464"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_465"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_466"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_467"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/self/query/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_468"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/self/query/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_469"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/self/query/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_470"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/self/query/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_471"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/self/key/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_472"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/self/key/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_473"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/self/key/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_474"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/self/key/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_475"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/self/value/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_476"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/self/value/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_477"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/self/value/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_478"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/self/value/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_479"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_480"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_481"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_482"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_483"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_484"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_485"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_486"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_487"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_488"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_489"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/intermediate/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_490"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/intermediate/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_491"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_492"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_493"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_494"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_495"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_496"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_497"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_498"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_499"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/self/query/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_500"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/self/query/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_501"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/self/query/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_502"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/self/query/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_503"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/self/key/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_504"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/self/key/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_505"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/self/key/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_506"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/self/key/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_507"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/self/value/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_508"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/self/value/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_509"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/self/value/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_510"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/self/value/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_511"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_512"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_513"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_514"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_515"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_516"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_517"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_518"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_519"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_520"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_521"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/intermediate/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_522"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/intermediate/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_523"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_524"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_525"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_526"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_527"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_528"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_529"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_530"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_531"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/self/query/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_532"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/self/query/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_533"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/self/query/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_534"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/self/query/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_535"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/self/key/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_536"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/self/key/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_537"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/self/key/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_538"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/self/key/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_539"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/self/value/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_540"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/self/value/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_541"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/self/value/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_542"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/self/value/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_543"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_544"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_545"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_546"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_547"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_548"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_549"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_550"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_551"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_552"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_553"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/intermediate/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_554"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/intermediate/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_555"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_556"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_557"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_558"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_559"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_560"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_561"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_562"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_563"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/self/query/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_564"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/self/query/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_565"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/self/query/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_566"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/self/query/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_567"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/self/key/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_568"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/self/key/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_569"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/self/key/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_570"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/self/key/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_571"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/self/value/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_572"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/self/value/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_573"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/self/value/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_574"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/self/value/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_575"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_576"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_577"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_578"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_579"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_580"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_581"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_582"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_583"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_584"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_585"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/intermediate/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_586"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/intermediate/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_587"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/output/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_588"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/output/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_589"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/output/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_590"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/output/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_591"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_592"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_593"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_594"
+ op: "IsVariableInitialized"
+ input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_595"
+ op: "IsVariableInitialized"
+ input: "bert/pooler/dense/kernel/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_596"
+ op: "IsVariableInitialized"
+ input: "bert/pooler/dense/kernel/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_597"
+ op: "IsVariableInitialized"
+ input: "bert/pooler/dense/bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_598"
+ op: "IsVariableInitialized"
+ input: "bert/pooler/dense/bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/pooler/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_599"
+ op: "IsVariableInitialized"
+ input: "output_weights/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_weights/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_600"
+ op: "IsVariableInitialized"
+ input: "output_weights/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_weights/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_601"
+ op: "IsVariableInitialized"
+ input: "output_bias/adam_m"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/IsVariableInitialized_602"
+ op: "IsVariableInitialized"
+ input: "output_bias/adam_v"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@output_bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/stack"
+ op: "Pack"
+ input: "report_uninitialized_variables_1/VarIsInitializedOp"
+ input: "report_uninitialized_variables_1/IsVariableInitialized"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_1"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_2"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_3"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_4"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_5"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_6"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_7"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_8"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_9"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_10"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_11"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_12"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_13"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_14"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_15"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_16"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_17"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_18"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_19"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_20"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_21"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_22"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_23"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_24"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_25"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_26"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_27"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_28"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_29"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_30"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_31"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_32"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_33"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_34"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_35"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_36"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_37"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_38"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_39"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_40"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_41"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_42"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_43"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_44"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_45"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_46"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_47"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_48"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_49"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_50"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_51"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_52"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_53"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_54"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_55"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_56"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_57"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_58"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_59"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_60"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_61"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_62"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_63"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_64"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_65"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_66"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_67"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_68"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_69"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_70"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_71"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_72"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_73"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_74"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_75"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_76"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_77"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_78"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_79"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_80"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_81"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_82"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_83"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_84"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_85"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_86"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_87"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_88"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_89"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_90"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_91"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_92"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_93"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_94"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_95"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_96"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_97"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_98"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_99"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_100"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_101"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_102"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_103"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_104"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_105"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_106"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_107"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_108"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_109"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_110"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_111"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_112"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_113"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_114"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_115"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_116"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_117"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_118"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_119"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_120"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_121"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_122"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_123"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_124"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_125"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_126"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_127"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_128"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_129"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_130"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_131"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_132"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_133"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_134"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_135"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_136"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_137"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_138"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_139"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_140"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_141"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_142"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_143"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_144"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_145"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_146"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_147"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_148"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_149"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_150"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_151"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_152"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_153"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_154"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_155"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_156"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_157"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_158"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_159"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_160"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_161"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_162"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_163"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_164"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_165"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_166"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_167"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_168"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_169"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_170"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_171"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_172"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_173"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_174"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_175"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_176"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_177"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_178"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_179"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_180"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_181"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_182"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_183"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_184"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_185"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_186"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_187"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_188"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_189"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_190"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_191"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_192"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_193"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_194"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_195"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_196"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_197"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_198"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_199"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_200"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_201"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_202"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_203"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_204"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_205"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_206"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_207"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_208"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_209"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_210"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_211"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_212"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_213"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_214"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_215"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_216"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_217"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_218"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_219"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_220"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_221"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_222"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_223"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_224"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_225"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_226"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_227"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_228"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_229"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_230"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_231"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_232"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_233"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_234"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_235"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_236"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_237"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_238"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_239"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_240"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_241"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_242"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_243"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_244"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_245"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_246"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_247"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_248"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_249"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_250"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_251"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_252"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_253"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_254"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_255"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_256"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_257"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_258"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_259"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_260"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_261"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_262"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_263"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_264"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_265"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_266"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_267"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_268"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_269"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_270"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_271"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_272"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_273"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_274"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_275"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_276"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_277"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_278"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_279"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_280"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_281"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_282"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_283"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_284"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_285"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_286"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_287"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_288"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_289"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_290"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_291"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_292"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_293"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_294"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_295"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_296"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_297"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_298"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_299"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_300"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_301"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_302"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_303"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_304"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_305"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_306"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_307"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_308"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_309"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_310"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_311"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_312"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_313"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_314"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_315"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_316"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_317"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_318"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_319"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_320"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_321"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_322"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_323"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_324"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_325"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_326"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_327"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_328"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_329"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_330"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_331"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_332"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_333"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_334"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_335"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_336"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_337"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_338"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_339"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_340"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_341"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_342"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_343"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_344"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_345"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_346"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_347"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_348"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_349"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_350"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_351"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_352"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_353"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_354"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_355"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_356"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_357"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_358"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_359"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_360"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_361"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_362"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_363"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_364"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_365"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_366"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_367"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_368"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_369"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_370"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_371"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_372"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_373"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_374"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_375"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_376"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_377"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_378"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_379"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_380"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_381"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_382"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_383"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_384"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_385"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_386"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_387"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_388"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_389"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_390"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_391"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_392"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_393"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_394"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_395"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_396"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_397"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_398"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_399"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_400"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_401"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_402"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_403"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_404"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_405"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_406"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_407"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_408"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_409"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_410"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_411"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_412"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_413"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_414"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_415"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_416"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_417"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_418"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_419"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_420"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_421"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_422"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_423"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_424"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_425"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_426"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_427"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_428"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_429"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_430"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_431"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_432"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_433"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_434"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_435"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_436"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_437"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_438"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_439"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_440"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_441"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_442"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_443"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_444"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_445"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_446"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_447"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_448"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_449"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_450"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_451"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_452"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_453"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_454"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_455"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_456"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_457"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_458"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_459"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_460"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_461"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_462"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_463"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_464"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_465"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_466"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_467"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_468"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_469"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_470"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_471"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_472"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_473"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_474"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_475"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_476"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_477"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_478"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_479"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_480"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_481"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_482"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_483"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_484"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_485"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_486"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_487"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_488"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_489"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_490"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_491"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_492"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_493"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_494"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_495"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_496"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_497"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_498"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_499"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_500"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_501"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_502"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_503"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_504"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_505"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_506"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_507"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_508"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_509"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_510"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_511"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_512"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_513"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_514"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_515"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_516"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_517"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_518"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_519"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_520"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_521"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_522"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_523"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_524"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_525"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_526"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_527"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_528"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_529"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_530"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_531"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_532"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_533"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_534"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_535"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_536"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_537"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_538"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_539"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_540"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_541"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_542"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_543"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_544"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_545"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_546"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_547"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_548"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_549"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_550"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_551"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_552"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_553"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_554"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_555"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_556"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_557"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_558"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_559"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_560"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_561"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_562"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_563"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_564"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_565"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_566"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_567"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_568"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_569"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_570"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_571"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_572"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_573"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_574"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_575"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_576"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_577"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_578"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_579"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_580"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_581"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_582"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_583"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_584"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_585"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_586"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_587"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_588"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_589"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_590"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_591"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_592"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_593"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_594"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_595"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_596"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_597"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_598"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_599"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_600"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_601"
+ input: "report_uninitialized_variables_1/IsVariableInitialized_602"
+ device: "/device:CPU:0"
+ attr {
+ key: "N"
+ value {
+ i: 604
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 604
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "axis"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/LogicalNot"
+ op: "LogicalNot"
+ input: "report_uninitialized_variables_1/stack"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 604
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/Const"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 604
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 604
+ }
+ }
+ string_val: "global_step"
+ string_val: "bert/embeddings/word_embeddings"
+ string_val: "bert/embeddings/token_type_embeddings"
+ string_val: "bert/embeddings/position_embeddings"
+ string_val: "bert/embeddings/LayerNorm/beta"
+ string_val: "bert/embeddings/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_0/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_0/attention/self/query/bias"
+ string_val: "bert/encoder/layer_0/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_0/attention/self/key/bias"
+ string_val: "bert/encoder/layer_0/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_0/attention/self/value/bias"
+ string_val: "bert/encoder/layer_0/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_0/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_0/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_0/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_0/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_0/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_0/output/dense/kernel"
+ string_val: "bert/encoder/layer_0/output/dense/bias"
+ string_val: "bert/encoder/layer_0/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_0/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_1/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_1/attention/self/query/bias"
+ string_val: "bert/encoder/layer_1/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_1/attention/self/key/bias"
+ string_val: "bert/encoder/layer_1/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_1/attention/self/value/bias"
+ string_val: "bert/encoder/layer_1/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_1/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_1/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_1/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_1/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_1/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_1/output/dense/kernel"
+ string_val: "bert/encoder/layer_1/output/dense/bias"
+ string_val: "bert/encoder/layer_1/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_1/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_2/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_2/attention/self/query/bias"
+ string_val: "bert/encoder/layer_2/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_2/attention/self/key/bias"
+ string_val: "bert/encoder/layer_2/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_2/attention/self/value/bias"
+ string_val: "bert/encoder/layer_2/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_2/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_2/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_2/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_2/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_2/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_2/output/dense/kernel"
+ string_val: "bert/encoder/layer_2/output/dense/bias"
+ string_val: "bert/encoder/layer_2/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_2/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_3/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_3/attention/self/query/bias"
+ string_val: "bert/encoder/layer_3/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_3/attention/self/key/bias"
+ string_val: "bert/encoder/layer_3/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_3/attention/self/value/bias"
+ string_val: "bert/encoder/layer_3/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_3/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_3/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_3/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_3/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_3/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_3/output/dense/kernel"
+ string_val: "bert/encoder/layer_3/output/dense/bias"
+ string_val: "bert/encoder/layer_3/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_3/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_4/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_4/attention/self/query/bias"
+ string_val: "bert/encoder/layer_4/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_4/attention/self/key/bias"
+ string_val: "bert/encoder/layer_4/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_4/attention/self/value/bias"
+ string_val: "bert/encoder/layer_4/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_4/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_4/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_4/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_4/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_4/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_4/output/dense/kernel"
+ string_val: "bert/encoder/layer_4/output/dense/bias"
+ string_val: "bert/encoder/layer_4/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_4/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_5/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_5/attention/self/query/bias"
+ string_val: "bert/encoder/layer_5/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_5/attention/self/key/bias"
+ string_val: "bert/encoder/layer_5/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_5/attention/self/value/bias"
+ string_val: "bert/encoder/layer_5/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_5/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_5/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_5/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_5/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_5/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_5/output/dense/kernel"
+ string_val: "bert/encoder/layer_5/output/dense/bias"
+ string_val: "bert/encoder/layer_5/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_5/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_6/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_6/attention/self/query/bias"
+ string_val: "bert/encoder/layer_6/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_6/attention/self/key/bias"
+ string_val: "bert/encoder/layer_6/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_6/attention/self/value/bias"
+ string_val: "bert/encoder/layer_6/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_6/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_6/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_6/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_6/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_6/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_6/output/dense/kernel"
+ string_val: "bert/encoder/layer_6/output/dense/bias"
+ string_val: "bert/encoder/layer_6/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_6/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_7/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_7/attention/self/query/bias"
+ string_val: "bert/encoder/layer_7/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_7/attention/self/key/bias"
+ string_val: "bert/encoder/layer_7/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_7/attention/self/value/bias"
+ string_val: "bert/encoder/layer_7/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_7/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_7/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_7/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_7/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_7/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_7/output/dense/kernel"
+ string_val: "bert/encoder/layer_7/output/dense/bias"
+ string_val: "bert/encoder/layer_7/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_7/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_8/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_8/attention/self/query/bias"
+ string_val: "bert/encoder/layer_8/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_8/attention/self/key/bias"
+ string_val: "bert/encoder/layer_8/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_8/attention/self/value/bias"
+ string_val: "bert/encoder/layer_8/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_8/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_8/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_8/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_8/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_8/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_8/output/dense/kernel"
+ string_val: "bert/encoder/layer_8/output/dense/bias"
+ string_val: "bert/encoder/layer_8/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_8/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_9/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_9/attention/self/query/bias"
+ string_val: "bert/encoder/layer_9/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_9/attention/self/key/bias"
+ string_val: "bert/encoder/layer_9/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_9/attention/self/value/bias"
+ string_val: "bert/encoder/layer_9/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_9/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_9/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_9/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_9/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_9/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_9/output/dense/kernel"
+ string_val: "bert/encoder/layer_9/output/dense/bias"
+ string_val: "bert/encoder/layer_9/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_9/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_10/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_10/attention/self/query/bias"
+ string_val: "bert/encoder/layer_10/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_10/attention/self/key/bias"
+ string_val: "bert/encoder/layer_10/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_10/attention/self/value/bias"
+ string_val: "bert/encoder/layer_10/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_10/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_10/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_10/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_10/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_10/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_10/output/dense/kernel"
+ string_val: "bert/encoder/layer_10/output/dense/bias"
+ string_val: "bert/encoder/layer_10/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_10/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_11/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_11/attention/self/query/bias"
+ string_val: "bert/encoder/layer_11/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_11/attention/self/key/bias"
+ string_val: "bert/encoder/layer_11/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_11/attention/self/value/bias"
+ string_val: "bert/encoder/layer_11/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_11/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_11/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_11/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_11/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_11/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_11/output/dense/kernel"
+ string_val: "bert/encoder/layer_11/output/dense/bias"
+ string_val: "bert/encoder/layer_11/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_11/output/LayerNorm/gamma"
+ string_val: "bert/pooler/dense/kernel"
+ string_val: "bert/pooler/dense/bias"
+ string_val: "output_weights"
+ string_val: "output_bias"
+ string_val: "bert/embeddings/word_embeddings/adam_m"
+ string_val: "bert/embeddings/word_embeddings/adam_v"
+ string_val: "bert/embeddings/token_type_embeddings/adam_m"
+ string_val: "bert/embeddings/token_type_embeddings/adam_v"
+ string_val: "bert/embeddings/position_embeddings/adam_m"
+ string_val: "bert/embeddings/position_embeddings/adam_v"
+ string_val: "bert/embeddings/LayerNorm/beta/adam_m"
+ string_val: "bert/embeddings/LayerNorm/beta/adam_v"
+ string_val: "bert/embeddings/LayerNorm/gamma/adam_m"
+ string_val: "bert/embeddings/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_0/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_0/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_0/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_0/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_0/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_0/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_0/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_0/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_0/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_0/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_0/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_0/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_0/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_0/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_0/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_0/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_0/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_0/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_0/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_0/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_1/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_1/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_1/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_1/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_1/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_1/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_1/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_1/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_1/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_1/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_1/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_1/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_1/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_1/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_1/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_1/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_1/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_1/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_1/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_1/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_2/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_2/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_2/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_2/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_2/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_2/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_2/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_2/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_2/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_2/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_2/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_2/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_2/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_2/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_2/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_2/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_2/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_2/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_2/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_2/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_3/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_3/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_3/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_3/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_3/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_3/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_3/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_3/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_3/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_3/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_3/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_3/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_3/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_3/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_3/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_3/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_3/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_3/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_3/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_3/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_4/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_4/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_4/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_4/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_4/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_4/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_4/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_4/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_4/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_4/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_4/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_4/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_4/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_4/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_4/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_4/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_4/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_4/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_4/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_4/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_5/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_5/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_5/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_5/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_5/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_5/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_5/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_5/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_5/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_5/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_5/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_5/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_5/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_5/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_5/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_5/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_5/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_5/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_5/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_5/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_6/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_6/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_6/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_6/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_6/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_6/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_6/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_6/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_6/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_6/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_6/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_6/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_6/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_6/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_6/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_6/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_6/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_6/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_6/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_6/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_7/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_7/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_7/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_7/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_7/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_7/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_7/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_7/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_7/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_7/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_7/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_7/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_7/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_7/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_7/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_7/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_7/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_7/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_7/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_7/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_8/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_8/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_8/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_8/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_8/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_8/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_8/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_8/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_8/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_8/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_8/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_8/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_8/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_8/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_8/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_8/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_8/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_8/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_8/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_8/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_9/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_9/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_9/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_9/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_9/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_9/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_9/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_9/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_9/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_9/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_9/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_9/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_9/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_9/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_9/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_9/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_9/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_9/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_9/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_9/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_10/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_10/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_10/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_10/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_10/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_10/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_10/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_10/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_10/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_10/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_10/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_10/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_10/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_10/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_10/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_10/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_10/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_10/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_10/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_10/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_11/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_11/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_11/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_11/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_11/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_11/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_11/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_11/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_11/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_11/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_11/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_11/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_11/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_11/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_11/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_11/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_11/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_11/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_11/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_11/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/pooler/dense/kernel/adam_m"
+ string_val: "bert/pooler/dense/kernel/adam_v"
+ string_val: "bert/pooler/dense/bias/adam_m"
+ string_val: "bert/pooler/dense/bias/adam_v"
+ string_val: "output_weights/adam_m"
+ string_val: "output_weights/adam_v"
+ string_val: "output_bias/adam_m"
+ string_val: "output_bias/adam_v"
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/boolean_mask/Shape"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 604
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/boolean_mask/strided_slice/stack"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/boolean_mask/strided_slice/stack_1"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/boolean_mask/strided_slice/stack_2"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/boolean_mask/strided_slice"
+ op: "StridedSlice"
+ input: "report_uninitialized_variables_1/boolean_mask/Shape"
+ input: "report_uninitialized_variables_1/boolean_mask/strided_slice/stack"
+ input: "report_uninitialized_variables_1/boolean_mask/strided_slice/stack_1"
+ input: "report_uninitialized_variables_1/boolean_mask/strided_slice/stack_2"
+ device: "/device:CPU:0"
+ attr {
+ key: "Index"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "begin_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "ellipsis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "end_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "new_axis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "shrink_axis_mask"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/boolean_mask/Prod/reduction_indices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/boolean_mask/Prod"
+ op: "Prod"
+ input: "report_uninitialized_variables_1/boolean_mask/strided_slice"
+ input: "report_uninitialized_variables_1/boolean_mask/Prod/reduction_indices"
+ device: "/device:CPU:0"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "keep_dims"
+ value {
+ b: false
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/boolean_mask/Shape_1"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 604
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/boolean_mask/strided_slice_1/stack"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/boolean_mask/strided_slice_1/stack_1"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/boolean_mask/strided_slice_1/stack_2"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/boolean_mask/strided_slice_1"
+ op: "StridedSlice"
+ input: "report_uninitialized_variables_1/boolean_mask/Shape_1"
+ input: "report_uninitialized_variables_1/boolean_mask/strided_slice_1/stack"
+ input: "report_uninitialized_variables_1/boolean_mask/strided_slice_1/stack_1"
+ input: "report_uninitialized_variables_1/boolean_mask/strided_slice_1/stack_2"
+ device: "/device:CPU:0"
+ attr {
+ key: "Index"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "begin_mask"
+ value {
+ i: 1
+ }
+ }
+ attr {
+ key: "ellipsis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "end_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "new_axis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "shrink_axis_mask"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/boolean_mask/Shape_2"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 604
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/boolean_mask/strided_slice_2/stack"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/boolean_mask/strided_slice_2/stack_1"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/boolean_mask/strided_slice_2/stack_2"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/boolean_mask/strided_slice_2"
+ op: "StridedSlice"
+ input: "report_uninitialized_variables_1/boolean_mask/Shape_2"
+ input: "report_uninitialized_variables_1/boolean_mask/strided_slice_2/stack"
+ input: "report_uninitialized_variables_1/boolean_mask/strided_slice_2/stack_1"
+ input: "report_uninitialized_variables_1/boolean_mask/strided_slice_2/stack_2"
+ device: "/device:CPU:0"
+ attr {
+ key: "Index"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "begin_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "ellipsis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "end_mask"
+ value {
+ i: 1
+ }
+ }
+ attr {
+ key: "new_axis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "shrink_axis_mask"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/boolean_mask/concat/values_1"
+ op: "Pack"
+ input: "report_uninitialized_variables_1/boolean_mask/Prod"
+ device: "/device:CPU:0"
+ attr {
+ key: "N"
+ value {
+ i: 1
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "axis"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/boolean_mask/concat/axis"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/boolean_mask/concat"
+ op: "ConcatV2"
+ input: "report_uninitialized_variables_1/boolean_mask/strided_slice_1"
+ input: "report_uninitialized_variables_1/boolean_mask/concat/values_1"
+ input: "report_uninitialized_variables_1/boolean_mask/strided_slice_2"
+ input: "report_uninitialized_variables_1/boolean_mask/concat/axis"
+ device: "/device:CPU:0"
+ attr {
+ key: "N"
+ value {
+ i: 3
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/boolean_mask/Reshape"
+ op: "Reshape"
+ input: "report_uninitialized_variables_1/Const"
+ input: "report_uninitialized_variables_1/boolean_mask/concat"
+ device: "/device:CPU:0"
+ attr {
+ key: "T"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 604
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/boolean_mask/Reshape_1/shape"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ int_val: -1
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/boolean_mask/Reshape_1"
+ op: "Reshape"
+ input: "report_uninitialized_variables_1/LogicalNot"
+ input: "report_uninitialized_variables_1/boolean_mask/Reshape_1/shape"
+ device: "/device:CPU:0"
+ attr {
+ key: "T"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "Tshape"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 604
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/boolean_mask/Where"
+ op: "Where"
+ input: "report_uninitialized_variables_1/boolean_mask/Reshape_1"
+ device: "/device:CPU:0"
+ attr {
+ key: "T"
+ value {
+ type: DT_BOOL
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/boolean_mask/Squeeze"
+ op: "Squeeze"
+ input: "report_uninitialized_variables_1/boolean_mask/Where"
+ device: "/device:CPU:0"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT64
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "squeeze_dims"
+ value {
+ list {
+ i: 1
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/boolean_mask/GatherV2/axis"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "report_uninitialized_variables_1/boolean_mask/GatherV2"
+ op: "GatherV2"
+ input: "report_uninitialized_variables_1/boolean_mask/Reshape"
+ input: "report_uninitialized_variables_1/boolean_mask/Squeeze"
+ input: "report_uninitialized_variables_1/boolean_mask/GatherV2/axis"
+ device: "/device:CPU:0"
+ attr {
+ key: "Taxis"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "Tindices"
+ value {
+ type: DT_INT64
+ }
+ }
+ attr {
+ key: "Tparams"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "batch_dims"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "report_uninitialized_resources_1/Const"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "concat_1/axis"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "concat_1"
+ op: "ConcatV2"
+ input: "report_uninitialized_variables_1/boolean_mask/GatherV2"
+ input: "report_uninitialized_resources_1/Const"
+ input: "concat_1/axis"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ }
+ }
+ }
+ }
+}
+node {
+ name: "init_2"
+ op: "NoOp"
+}
+node {
+ name: "init_all_tables"
+ op: "NoOp"
+}
+node {
+ name: "init_3"
+ op: "NoOp"
+}
+node {
+ name: "group_deps_3"
+ op: "NoOp"
+ input: "^init_2"
+ input: "^init_3"
+ input: "^init_all_tables"
+}
+node {
+ name: "Merge/MergeSummary"
+ op: "MergeSummary"
+ input: "loss_1"
+ attr {
+ key: "N"
+ value {
+ i: 1
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "save/filename/input"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "model"
+ }
+ }
+ }
+}
+node {
+ name: "save/filename"
+ op: "PlaceholderWithDefault"
+ input: "save/filename/input"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ }
+ }
+ }
+}
+node {
+ name: "save/Const"
+ op: "PlaceholderWithDefault"
+ input: "save/filename"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ }
+ }
+ }
+}
+node {
+ name: "save/StringJoin/inputs_1"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ }
+ string_val: "_temp_823f412404fa4e59893c248d5a436737/part"
+ }
+ }
+ }
+}
+node {
+ name: "save/StringJoin"
+ op: "StringJoin"
+ input: "save/Const"
+ input: "save/StringJoin/inputs_1"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "separator"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "save/num_shards"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 1
+ }
+ }
+ }
+}
+node {
+ name: "save/ShardedFilename/shard"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 0
+ }
+ }
+ }
+}
+node {
+ name: "save/ShardedFilename"
+ op: "ShardedFilename"
+ input: "save/StringJoin"
+ input: "save/ShardedFilename/shard"
+ input: "save/num_shards"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "save/SaveV2/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 604
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 604
+ }
+ }
+ string_val: "bert/embeddings/LayerNorm/beta"
+ string_val: "bert/embeddings/LayerNorm/beta/adam_m"
+ string_val: "bert/embeddings/LayerNorm/beta/adam_v"
+ string_val: "bert/embeddings/LayerNorm/gamma"
+ string_val: "bert/embeddings/LayerNorm/gamma/adam_m"
+ string_val: "bert/embeddings/LayerNorm/gamma/adam_v"
+ string_val: "bert/embeddings/position_embeddings"
+ string_val: "bert/embeddings/position_embeddings/adam_m"
+ string_val: "bert/embeddings/position_embeddings/adam_v"
+ string_val: "bert/embeddings/token_type_embeddings"
+ string_val: "bert/embeddings/token_type_embeddings/adam_m"
+ string_val: "bert/embeddings/token_type_embeddings/adam_v"
+ string_val: "bert/embeddings/word_embeddings"
+ string_val: "bert/embeddings/word_embeddings/adam_m"
+ string_val: "bert/embeddings/word_embeddings/adam_v"
+ string_val: "bert/encoder/layer_0/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_0/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_0/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_0/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_0/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_0/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_0/attention/self/key/bias"
+ string_val: "bert/encoder/layer_0/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_0/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_0/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_0/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_0/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_0/attention/self/query/bias"
+ string_val: "bert/encoder/layer_0/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_0/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_0/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_0/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_0/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_0/attention/self/value/bias"
+ string_val: "bert/encoder/layer_0/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_0/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_0/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_0/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_0/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_0/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_0/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_0/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_0/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_0/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_0/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_0/output/dense/bias"
+ string_val: "bert/encoder/layer_0/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_0/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_0/output/dense/kernel"
+ string_val: "bert/encoder/layer_0/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_0/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_1/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_1/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_1/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_1/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_1/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_1/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_1/attention/self/key/bias"
+ string_val: "bert/encoder/layer_1/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_1/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_1/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_1/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_1/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_1/attention/self/query/bias"
+ string_val: "bert/encoder/layer_1/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_1/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_1/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_1/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_1/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_1/attention/self/value/bias"
+ string_val: "bert/encoder/layer_1/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_1/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_1/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_1/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_1/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_1/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_1/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_1/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_1/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_1/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_1/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_1/output/dense/bias"
+ string_val: "bert/encoder/layer_1/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_1/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_1/output/dense/kernel"
+ string_val: "bert/encoder/layer_1/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_1/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_10/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_10/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_10/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_10/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_10/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_10/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_10/attention/self/key/bias"
+ string_val: "bert/encoder/layer_10/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_10/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_10/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_10/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_10/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_10/attention/self/query/bias"
+ string_val: "bert/encoder/layer_10/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_10/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_10/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_10/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_10/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_10/attention/self/value/bias"
+ string_val: "bert/encoder/layer_10/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_10/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_10/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_10/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_10/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_10/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_10/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_10/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_10/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_10/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_10/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_10/output/dense/bias"
+ string_val: "bert/encoder/layer_10/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_10/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_10/output/dense/kernel"
+ string_val: "bert/encoder/layer_10/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_10/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_11/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_11/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_11/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_11/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_11/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_11/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_11/attention/self/key/bias"
+ string_val: "bert/encoder/layer_11/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_11/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_11/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_11/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_11/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_11/attention/self/query/bias"
+ string_val: "bert/encoder/layer_11/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_11/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_11/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_11/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_11/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_11/attention/self/value/bias"
+ string_val: "bert/encoder/layer_11/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_11/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_11/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_11/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_11/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_11/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_11/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_11/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_11/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_11/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_11/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_11/output/dense/bias"
+ string_val: "bert/encoder/layer_11/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_11/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_11/output/dense/kernel"
+ string_val: "bert/encoder/layer_11/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_11/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_2/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_2/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_2/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_2/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_2/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_2/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_2/attention/self/key/bias"
+ string_val: "bert/encoder/layer_2/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_2/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_2/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_2/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_2/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_2/attention/self/query/bias"
+ string_val: "bert/encoder/layer_2/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_2/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_2/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_2/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_2/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_2/attention/self/value/bias"
+ string_val: "bert/encoder/layer_2/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_2/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_2/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_2/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_2/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_2/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_2/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_2/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_2/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_2/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_2/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_2/output/dense/bias"
+ string_val: "bert/encoder/layer_2/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_2/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_2/output/dense/kernel"
+ string_val: "bert/encoder/layer_2/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_2/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_3/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_3/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_3/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_3/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_3/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_3/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_3/attention/self/key/bias"
+ string_val: "bert/encoder/layer_3/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_3/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_3/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_3/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_3/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_3/attention/self/query/bias"
+ string_val: "bert/encoder/layer_3/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_3/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_3/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_3/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_3/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_3/attention/self/value/bias"
+ string_val: "bert/encoder/layer_3/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_3/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_3/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_3/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_3/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_3/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_3/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_3/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_3/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_3/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_3/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_3/output/dense/bias"
+ string_val: "bert/encoder/layer_3/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_3/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_3/output/dense/kernel"
+ string_val: "bert/encoder/layer_3/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_3/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_4/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_4/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_4/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_4/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_4/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_4/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_4/attention/self/key/bias"
+ string_val: "bert/encoder/layer_4/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_4/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_4/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_4/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_4/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_4/attention/self/query/bias"
+ string_val: "bert/encoder/layer_4/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_4/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_4/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_4/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_4/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_4/attention/self/value/bias"
+ string_val: "bert/encoder/layer_4/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_4/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_4/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_4/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_4/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_4/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_4/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_4/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_4/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_4/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_4/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_4/output/dense/bias"
+ string_val: "bert/encoder/layer_4/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_4/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_4/output/dense/kernel"
+ string_val: "bert/encoder/layer_4/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_4/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_5/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_5/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_5/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_5/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_5/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_5/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_5/attention/self/key/bias"
+ string_val: "bert/encoder/layer_5/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_5/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_5/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_5/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_5/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_5/attention/self/query/bias"
+ string_val: "bert/encoder/layer_5/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_5/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_5/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_5/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_5/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_5/attention/self/value/bias"
+ string_val: "bert/encoder/layer_5/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_5/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_5/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_5/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_5/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_5/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_5/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_5/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_5/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_5/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_5/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_5/output/dense/bias"
+ string_val: "bert/encoder/layer_5/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_5/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_5/output/dense/kernel"
+ string_val: "bert/encoder/layer_5/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_5/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_6/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_6/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_6/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_6/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_6/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_6/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_6/attention/self/key/bias"
+ string_val: "bert/encoder/layer_6/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_6/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_6/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_6/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_6/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_6/attention/self/query/bias"
+ string_val: "bert/encoder/layer_6/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_6/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_6/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_6/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_6/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_6/attention/self/value/bias"
+ string_val: "bert/encoder/layer_6/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_6/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_6/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_6/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_6/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_6/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_6/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_6/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_6/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_6/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_6/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_6/output/dense/bias"
+ string_val: "bert/encoder/layer_6/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_6/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_6/output/dense/kernel"
+ string_val: "bert/encoder/layer_6/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_6/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_7/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_7/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_7/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_7/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_7/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_7/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_7/attention/self/key/bias"
+ string_val: "bert/encoder/layer_7/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_7/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_7/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_7/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_7/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_7/attention/self/query/bias"
+ string_val: "bert/encoder/layer_7/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_7/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_7/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_7/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_7/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_7/attention/self/value/bias"
+ string_val: "bert/encoder/layer_7/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_7/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_7/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_7/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_7/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_7/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_7/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_7/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_7/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_7/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_7/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_7/output/dense/bias"
+ string_val: "bert/encoder/layer_7/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_7/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_7/output/dense/kernel"
+ string_val: "bert/encoder/layer_7/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_7/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_8/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_8/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_8/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_8/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_8/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_8/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_8/attention/self/key/bias"
+ string_val: "bert/encoder/layer_8/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_8/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_8/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_8/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_8/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_8/attention/self/query/bias"
+ string_val: "bert/encoder/layer_8/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_8/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_8/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_8/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_8/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_8/attention/self/value/bias"
+ string_val: "bert/encoder/layer_8/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_8/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_8/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_8/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_8/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_8/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_8/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_8/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_8/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_8/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_8/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_8/output/dense/bias"
+ string_val: "bert/encoder/layer_8/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_8/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_8/output/dense/kernel"
+ string_val: "bert/encoder/layer_8/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_8/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_9/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_9/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_9/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_9/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_9/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_9/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_9/attention/self/key/bias"
+ string_val: "bert/encoder/layer_9/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_9/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_9/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_9/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_9/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_9/attention/self/query/bias"
+ string_val: "bert/encoder/layer_9/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_9/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_9/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_9/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_9/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_9/attention/self/value/bias"
+ string_val: "bert/encoder/layer_9/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_9/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_9/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_9/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_9/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_9/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_9/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_9/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_9/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_9/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_9/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_9/output/dense/bias"
+ string_val: "bert/encoder/layer_9/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_9/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_9/output/dense/kernel"
+ string_val: "bert/encoder/layer_9/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_9/output/dense/kernel/adam_v"
+ string_val: "bert/pooler/dense/bias"
+ string_val: "bert/pooler/dense/bias/adam_m"
+ string_val: "bert/pooler/dense/bias/adam_v"
+ string_val: "bert/pooler/dense/kernel"
+ string_val: "bert/pooler/dense/kernel/adam_m"
+ string_val: "bert/pooler/dense/kernel/adam_v"
+ string_val: "global_step"
+ string_val: "output_bias"
+ string_val: "output_bias/adam_m"
+ string_val: "output_bias/adam_v"
+ string_val: "output_weights"
+ string_val: "output_weights/adam_m"
+ string_val: "output_weights/adam_v"
+ }
+ }
+ }
+}
+node {
+ name: "save/SaveV2/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 604
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 604
+ }
+ }
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "save/SaveV2"
+ op: "SaveV2"
+ input: "save/ShardedFilename"
+ input: "save/SaveV2/tensor_names"
+ input: "save/SaveV2/shape_and_slices"
+ input: "bert/embeddings/LayerNorm/beta"
+ input: "bert/embeddings/LayerNorm/beta/adam_m"
+ input: "bert/embeddings/LayerNorm/beta/adam_v"
+ input: "bert/embeddings/LayerNorm/gamma"
+ input: "bert/embeddings/LayerNorm/gamma/adam_m"
+ input: "bert/embeddings/LayerNorm/gamma/adam_v"
+ input: "bert/embeddings/position_embeddings"
+ input: "bert/embeddings/position_embeddings/adam_m"
+ input: "bert/embeddings/position_embeddings/adam_v"
+ input: "bert/embeddings/token_type_embeddings"
+ input: "bert/embeddings/token_type_embeddings/adam_m"
+ input: "bert/embeddings/token_type_embeddings/adam_v"
+ input: "bert/embeddings/word_embeddings"
+ input: "bert/embeddings/word_embeddings/adam_m"
+ input: "bert/embeddings/word_embeddings/adam_v"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/beta"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_0/attention/output/dense/bias"
+ input: "bert/encoder/layer_0/attention/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_0/attention/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_0/attention/output/dense/kernel"
+ input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_0/attention/self/key/bias"
+ input: "bert/encoder/layer_0/attention/self/key/bias/adam_m"
+ input: "bert/encoder/layer_0/attention/self/key/bias/adam_v"
+ input: "bert/encoder/layer_0/attention/self/key/kernel"
+ input: "bert/encoder/layer_0/attention/self/key/kernel/adam_m"
+ input: "bert/encoder/layer_0/attention/self/key/kernel/adam_v"
+ input: "bert/encoder/layer_0/attention/self/query/bias"
+ input: "bert/encoder/layer_0/attention/self/query/bias/adam_m"
+ input: "bert/encoder/layer_0/attention/self/query/bias/adam_v"
+ input: "bert/encoder/layer_0/attention/self/query/kernel"
+ input: "bert/encoder/layer_0/attention/self/query/kernel/adam_m"
+ input: "bert/encoder/layer_0/attention/self/query/kernel/adam_v"
+ input: "bert/encoder/layer_0/attention/self/value/bias"
+ input: "bert/encoder/layer_0/attention/self/value/bias/adam_m"
+ input: "bert/encoder/layer_0/attention/self/value/bias/adam_v"
+ input: "bert/encoder/layer_0/attention/self/value/kernel"
+ input: "bert/encoder/layer_0/attention/self/value/kernel/adam_m"
+ input: "bert/encoder/layer_0/attention/self/value/kernel/adam_v"
+ input: "bert/encoder/layer_0/intermediate/dense/bias"
+ input: "bert/encoder/layer_0/intermediate/dense/bias/adam_m"
+ input: "bert/encoder/layer_0/intermediate/dense/bias/adam_v"
+ input: "bert/encoder/layer_0/intermediate/dense/kernel"
+ input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m"
+ input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v"
+ input: "bert/encoder/layer_0/output/LayerNorm/beta"
+ input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_0/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_0/output/dense/bias"
+ input: "bert/encoder/layer_0/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_0/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_0/output/dense/kernel"
+ input: "bert/encoder/layer_0/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_0/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/beta"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_1/attention/output/dense/bias"
+ input: "bert/encoder/layer_1/attention/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_1/attention/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_1/attention/output/dense/kernel"
+ input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_1/attention/self/key/bias"
+ input: "bert/encoder/layer_1/attention/self/key/bias/adam_m"
+ input: "bert/encoder/layer_1/attention/self/key/bias/adam_v"
+ input: "bert/encoder/layer_1/attention/self/key/kernel"
+ input: "bert/encoder/layer_1/attention/self/key/kernel/adam_m"
+ input: "bert/encoder/layer_1/attention/self/key/kernel/adam_v"
+ input: "bert/encoder/layer_1/attention/self/query/bias"
+ input: "bert/encoder/layer_1/attention/self/query/bias/adam_m"
+ input: "bert/encoder/layer_1/attention/self/query/bias/adam_v"
+ input: "bert/encoder/layer_1/attention/self/query/kernel"
+ input: "bert/encoder/layer_1/attention/self/query/kernel/adam_m"
+ input: "bert/encoder/layer_1/attention/self/query/kernel/adam_v"
+ input: "bert/encoder/layer_1/attention/self/value/bias"
+ input: "bert/encoder/layer_1/attention/self/value/bias/adam_m"
+ input: "bert/encoder/layer_1/attention/self/value/bias/adam_v"
+ input: "bert/encoder/layer_1/attention/self/value/kernel"
+ input: "bert/encoder/layer_1/attention/self/value/kernel/adam_m"
+ input: "bert/encoder/layer_1/attention/self/value/kernel/adam_v"
+ input: "bert/encoder/layer_1/intermediate/dense/bias"
+ input: "bert/encoder/layer_1/intermediate/dense/bias/adam_m"
+ input: "bert/encoder/layer_1/intermediate/dense/bias/adam_v"
+ input: "bert/encoder/layer_1/intermediate/dense/kernel"
+ input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m"
+ input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v"
+ input: "bert/encoder/layer_1/output/LayerNorm/beta"
+ input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_1/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_1/output/dense/bias"
+ input: "bert/encoder/layer_1/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_1/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_1/output/dense/kernel"
+ input: "bert/encoder/layer_1/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_1/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/beta"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_10/attention/output/dense/bias"
+ input: "bert/encoder/layer_10/attention/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_10/attention/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_10/attention/output/dense/kernel"
+ input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_10/attention/self/key/bias"
+ input: "bert/encoder/layer_10/attention/self/key/bias/adam_m"
+ input: "bert/encoder/layer_10/attention/self/key/bias/adam_v"
+ input: "bert/encoder/layer_10/attention/self/key/kernel"
+ input: "bert/encoder/layer_10/attention/self/key/kernel/adam_m"
+ input: "bert/encoder/layer_10/attention/self/key/kernel/adam_v"
+ input: "bert/encoder/layer_10/attention/self/query/bias"
+ input: "bert/encoder/layer_10/attention/self/query/bias/adam_m"
+ input: "bert/encoder/layer_10/attention/self/query/bias/adam_v"
+ input: "bert/encoder/layer_10/attention/self/query/kernel"
+ input: "bert/encoder/layer_10/attention/self/query/kernel/adam_m"
+ input: "bert/encoder/layer_10/attention/self/query/kernel/adam_v"
+ input: "bert/encoder/layer_10/attention/self/value/bias"
+ input: "bert/encoder/layer_10/attention/self/value/bias/adam_m"
+ input: "bert/encoder/layer_10/attention/self/value/bias/adam_v"
+ input: "bert/encoder/layer_10/attention/self/value/kernel"
+ input: "bert/encoder/layer_10/attention/self/value/kernel/adam_m"
+ input: "bert/encoder/layer_10/attention/self/value/kernel/adam_v"
+ input: "bert/encoder/layer_10/intermediate/dense/bias"
+ input: "bert/encoder/layer_10/intermediate/dense/bias/adam_m"
+ input: "bert/encoder/layer_10/intermediate/dense/bias/adam_v"
+ input: "bert/encoder/layer_10/intermediate/dense/kernel"
+ input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m"
+ input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v"
+ input: "bert/encoder/layer_10/output/LayerNorm/beta"
+ input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_10/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_10/output/dense/bias"
+ input: "bert/encoder/layer_10/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_10/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_10/output/dense/kernel"
+ input: "bert/encoder/layer_10/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_10/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/beta"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_11/attention/output/dense/bias"
+ input: "bert/encoder/layer_11/attention/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_11/attention/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_11/attention/output/dense/kernel"
+ input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_11/attention/self/key/bias"
+ input: "bert/encoder/layer_11/attention/self/key/bias/adam_m"
+ input: "bert/encoder/layer_11/attention/self/key/bias/adam_v"
+ input: "bert/encoder/layer_11/attention/self/key/kernel"
+ input: "bert/encoder/layer_11/attention/self/key/kernel/adam_m"
+ input: "bert/encoder/layer_11/attention/self/key/kernel/adam_v"
+ input: "bert/encoder/layer_11/attention/self/query/bias"
+ input: "bert/encoder/layer_11/attention/self/query/bias/adam_m"
+ input: "bert/encoder/layer_11/attention/self/query/bias/adam_v"
+ input: "bert/encoder/layer_11/attention/self/query/kernel"
+ input: "bert/encoder/layer_11/attention/self/query/kernel/adam_m"
+ input: "bert/encoder/layer_11/attention/self/query/kernel/adam_v"
+ input: "bert/encoder/layer_11/attention/self/value/bias"
+ input: "bert/encoder/layer_11/attention/self/value/bias/adam_m"
+ input: "bert/encoder/layer_11/attention/self/value/bias/adam_v"
+ input: "bert/encoder/layer_11/attention/self/value/kernel"
+ input: "bert/encoder/layer_11/attention/self/value/kernel/adam_m"
+ input: "bert/encoder/layer_11/attention/self/value/kernel/adam_v"
+ input: "bert/encoder/layer_11/intermediate/dense/bias"
+ input: "bert/encoder/layer_11/intermediate/dense/bias/adam_m"
+ input: "bert/encoder/layer_11/intermediate/dense/bias/adam_v"
+ input: "bert/encoder/layer_11/intermediate/dense/kernel"
+ input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m"
+ input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v"
+ input: "bert/encoder/layer_11/output/LayerNorm/beta"
+ input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_11/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_11/output/dense/bias"
+ input: "bert/encoder/layer_11/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_11/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_11/output/dense/kernel"
+ input: "bert/encoder/layer_11/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_11/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/beta"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_2/attention/output/dense/bias"
+ input: "bert/encoder/layer_2/attention/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_2/attention/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_2/attention/output/dense/kernel"
+ input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_2/attention/self/key/bias"
+ input: "bert/encoder/layer_2/attention/self/key/bias/adam_m"
+ input: "bert/encoder/layer_2/attention/self/key/bias/adam_v"
+ input: "bert/encoder/layer_2/attention/self/key/kernel"
+ input: "bert/encoder/layer_2/attention/self/key/kernel/adam_m"
+ input: "bert/encoder/layer_2/attention/self/key/kernel/adam_v"
+ input: "bert/encoder/layer_2/attention/self/query/bias"
+ input: "bert/encoder/layer_2/attention/self/query/bias/adam_m"
+ input: "bert/encoder/layer_2/attention/self/query/bias/adam_v"
+ input: "bert/encoder/layer_2/attention/self/query/kernel"
+ input: "bert/encoder/layer_2/attention/self/query/kernel/adam_m"
+ input: "bert/encoder/layer_2/attention/self/query/kernel/adam_v"
+ input: "bert/encoder/layer_2/attention/self/value/bias"
+ input: "bert/encoder/layer_2/attention/self/value/bias/adam_m"
+ input: "bert/encoder/layer_2/attention/self/value/bias/adam_v"
+ input: "bert/encoder/layer_2/attention/self/value/kernel"
+ input: "bert/encoder/layer_2/attention/self/value/kernel/adam_m"
+ input: "bert/encoder/layer_2/attention/self/value/kernel/adam_v"
+ input: "bert/encoder/layer_2/intermediate/dense/bias"
+ input: "bert/encoder/layer_2/intermediate/dense/bias/adam_m"
+ input: "bert/encoder/layer_2/intermediate/dense/bias/adam_v"
+ input: "bert/encoder/layer_2/intermediate/dense/kernel"
+ input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m"
+ input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v"
+ input: "bert/encoder/layer_2/output/LayerNorm/beta"
+ input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_2/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_2/output/dense/bias"
+ input: "bert/encoder/layer_2/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_2/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_2/output/dense/kernel"
+ input: "bert/encoder/layer_2/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_2/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/beta"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_3/attention/output/dense/bias"
+ input: "bert/encoder/layer_3/attention/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_3/attention/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_3/attention/output/dense/kernel"
+ input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_3/attention/self/key/bias"
+ input: "bert/encoder/layer_3/attention/self/key/bias/adam_m"
+ input: "bert/encoder/layer_3/attention/self/key/bias/adam_v"
+ input: "bert/encoder/layer_3/attention/self/key/kernel"
+ input: "bert/encoder/layer_3/attention/self/key/kernel/adam_m"
+ input: "bert/encoder/layer_3/attention/self/key/kernel/adam_v"
+ input: "bert/encoder/layer_3/attention/self/query/bias"
+ input: "bert/encoder/layer_3/attention/self/query/bias/adam_m"
+ input: "bert/encoder/layer_3/attention/self/query/bias/adam_v"
+ input: "bert/encoder/layer_3/attention/self/query/kernel"
+ input: "bert/encoder/layer_3/attention/self/query/kernel/adam_m"
+ input: "bert/encoder/layer_3/attention/self/query/kernel/adam_v"
+ input: "bert/encoder/layer_3/attention/self/value/bias"
+ input: "bert/encoder/layer_3/attention/self/value/bias/adam_m"
+ input: "bert/encoder/layer_3/attention/self/value/bias/adam_v"
+ input: "bert/encoder/layer_3/attention/self/value/kernel"
+ input: "bert/encoder/layer_3/attention/self/value/kernel/adam_m"
+ input: "bert/encoder/layer_3/attention/self/value/kernel/adam_v"
+ input: "bert/encoder/layer_3/intermediate/dense/bias"
+ input: "bert/encoder/layer_3/intermediate/dense/bias/adam_m"
+ input: "bert/encoder/layer_3/intermediate/dense/bias/adam_v"
+ input: "bert/encoder/layer_3/intermediate/dense/kernel"
+ input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m"
+ input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v"
+ input: "bert/encoder/layer_3/output/LayerNorm/beta"
+ input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_3/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_3/output/dense/bias"
+ input: "bert/encoder/layer_3/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_3/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_3/output/dense/kernel"
+ input: "bert/encoder/layer_3/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_3/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/beta"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_4/attention/output/dense/bias"
+ input: "bert/encoder/layer_4/attention/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_4/attention/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_4/attention/output/dense/kernel"
+ input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_4/attention/self/key/bias"
+ input: "bert/encoder/layer_4/attention/self/key/bias/adam_m"
+ input: "bert/encoder/layer_4/attention/self/key/bias/adam_v"
+ input: "bert/encoder/layer_4/attention/self/key/kernel"
+ input: "bert/encoder/layer_4/attention/self/key/kernel/adam_m"
+ input: "bert/encoder/layer_4/attention/self/key/kernel/adam_v"
+ input: "bert/encoder/layer_4/attention/self/query/bias"
+ input: "bert/encoder/layer_4/attention/self/query/bias/adam_m"
+ input: "bert/encoder/layer_4/attention/self/query/bias/adam_v"
+ input: "bert/encoder/layer_4/attention/self/query/kernel"
+ input: "bert/encoder/layer_4/attention/self/query/kernel/adam_m"
+ input: "bert/encoder/layer_4/attention/self/query/kernel/adam_v"
+ input: "bert/encoder/layer_4/attention/self/value/bias"
+ input: "bert/encoder/layer_4/attention/self/value/bias/adam_m"
+ input: "bert/encoder/layer_4/attention/self/value/bias/adam_v"
+ input: "bert/encoder/layer_4/attention/self/value/kernel"
+ input: "bert/encoder/layer_4/attention/self/value/kernel/adam_m"
+ input: "bert/encoder/layer_4/attention/self/value/kernel/adam_v"
+ input: "bert/encoder/layer_4/intermediate/dense/bias"
+ input: "bert/encoder/layer_4/intermediate/dense/bias/adam_m"
+ input: "bert/encoder/layer_4/intermediate/dense/bias/adam_v"
+ input: "bert/encoder/layer_4/intermediate/dense/kernel"
+ input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m"
+ input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v"
+ input: "bert/encoder/layer_4/output/LayerNorm/beta"
+ input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_4/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_4/output/dense/bias"
+ input: "bert/encoder/layer_4/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_4/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_4/output/dense/kernel"
+ input: "bert/encoder/layer_4/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_4/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/beta"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_5/attention/output/dense/bias"
+ input: "bert/encoder/layer_5/attention/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_5/attention/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_5/attention/output/dense/kernel"
+ input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_5/attention/self/key/bias"
+ input: "bert/encoder/layer_5/attention/self/key/bias/adam_m"
+ input: "bert/encoder/layer_5/attention/self/key/bias/adam_v"
+ input: "bert/encoder/layer_5/attention/self/key/kernel"
+ input: "bert/encoder/layer_5/attention/self/key/kernel/adam_m"
+ input: "bert/encoder/layer_5/attention/self/key/kernel/adam_v"
+ input: "bert/encoder/layer_5/attention/self/query/bias"
+ input: "bert/encoder/layer_5/attention/self/query/bias/adam_m"
+ input: "bert/encoder/layer_5/attention/self/query/bias/adam_v"
+ input: "bert/encoder/layer_5/attention/self/query/kernel"
+ input: "bert/encoder/layer_5/attention/self/query/kernel/adam_m"
+ input: "bert/encoder/layer_5/attention/self/query/kernel/adam_v"
+ input: "bert/encoder/layer_5/attention/self/value/bias"
+ input: "bert/encoder/layer_5/attention/self/value/bias/adam_m"
+ input: "bert/encoder/layer_5/attention/self/value/bias/adam_v"
+ input: "bert/encoder/layer_5/attention/self/value/kernel"
+ input: "bert/encoder/layer_5/attention/self/value/kernel/adam_m"
+ input: "bert/encoder/layer_5/attention/self/value/kernel/adam_v"
+ input: "bert/encoder/layer_5/intermediate/dense/bias"
+ input: "bert/encoder/layer_5/intermediate/dense/bias/adam_m"
+ input: "bert/encoder/layer_5/intermediate/dense/bias/adam_v"
+ input: "bert/encoder/layer_5/intermediate/dense/kernel"
+ input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m"
+ input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v"
+ input: "bert/encoder/layer_5/output/LayerNorm/beta"
+ input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_5/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_5/output/dense/bias"
+ input: "bert/encoder/layer_5/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_5/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_5/output/dense/kernel"
+ input: "bert/encoder/layer_5/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_5/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/beta"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_6/attention/output/dense/bias"
+ input: "bert/encoder/layer_6/attention/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_6/attention/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_6/attention/output/dense/kernel"
+ input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_6/attention/self/key/bias"
+ input: "bert/encoder/layer_6/attention/self/key/bias/adam_m"
+ input: "bert/encoder/layer_6/attention/self/key/bias/adam_v"
+ input: "bert/encoder/layer_6/attention/self/key/kernel"
+ input: "bert/encoder/layer_6/attention/self/key/kernel/adam_m"
+ input: "bert/encoder/layer_6/attention/self/key/kernel/adam_v"
+ input: "bert/encoder/layer_6/attention/self/query/bias"
+ input: "bert/encoder/layer_6/attention/self/query/bias/adam_m"
+ input: "bert/encoder/layer_6/attention/self/query/bias/adam_v"
+ input: "bert/encoder/layer_6/attention/self/query/kernel"
+ input: "bert/encoder/layer_6/attention/self/query/kernel/adam_m"
+ input: "bert/encoder/layer_6/attention/self/query/kernel/adam_v"
+ input: "bert/encoder/layer_6/attention/self/value/bias"
+ input: "bert/encoder/layer_6/attention/self/value/bias/adam_m"
+ input: "bert/encoder/layer_6/attention/self/value/bias/adam_v"
+ input: "bert/encoder/layer_6/attention/self/value/kernel"
+ input: "bert/encoder/layer_6/attention/self/value/kernel/adam_m"
+ input: "bert/encoder/layer_6/attention/self/value/kernel/adam_v"
+ input: "bert/encoder/layer_6/intermediate/dense/bias"
+ input: "bert/encoder/layer_6/intermediate/dense/bias/adam_m"
+ input: "bert/encoder/layer_6/intermediate/dense/bias/adam_v"
+ input: "bert/encoder/layer_6/intermediate/dense/kernel"
+ input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m"
+ input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v"
+ input: "bert/encoder/layer_6/output/LayerNorm/beta"
+ input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_6/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_6/output/dense/bias"
+ input: "bert/encoder/layer_6/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_6/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_6/output/dense/kernel"
+ input: "bert/encoder/layer_6/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_6/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/beta"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_7/attention/output/dense/bias"
+ input: "bert/encoder/layer_7/attention/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_7/attention/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_7/attention/output/dense/kernel"
+ input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_7/attention/self/key/bias"
+ input: "bert/encoder/layer_7/attention/self/key/bias/adam_m"
+ input: "bert/encoder/layer_7/attention/self/key/bias/adam_v"
+ input: "bert/encoder/layer_7/attention/self/key/kernel"
+ input: "bert/encoder/layer_7/attention/self/key/kernel/adam_m"
+ input: "bert/encoder/layer_7/attention/self/key/kernel/adam_v"
+ input: "bert/encoder/layer_7/attention/self/query/bias"
+ input: "bert/encoder/layer_7/attention/self/query/bias/adam_m"
+ input: "bert/encoder/layer_7/attention/self/query/bias/adam_v"
+ input: "bert/encoder/layer_7/attention/self/query/kernel"
+ input: "bert/encoder/layer_7/attention/self/query/kernel/adam_m"
+ input: "bert/encoder/layer_7/attention/self/query/kernel/adam_v"
+ input: "bert/encoder/layer_7/attention/self/value/bias"
+ input: "bert/encoder/layer_7/attention/self/value/bias/adam_m"
+ input: "bert/encoder/layer_7/attention/self/value/bias/adam_v"
+ input: "bert/encoder/layer_7/attention/self/value/kernel"
+ input: "bert/encoder/layer_7/attention/self/value/kernel/adam_m"
+ input: "bert/encoder/layer_7/attention/self/value/kernel/adam_v"
+ input: "bert/encoder/layer_7/intermediate/dense/bias"
+ input: "bert/encoder/layer_7/intermediate/dense/bias/adam_m"
+ input: "bert/encoder/layer_7/intermediate/dense/bias/adam_v"
+ input: "bert/encoder/layer_7/intermediate/dense/kernel"
+ input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m"
+ input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v"
+ input: "bert/encoder/layer_7/output/LayerNorm/beta"
+ input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_7/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_7/output/dense/bias"
+ input: "bert/encoder/layer_7/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_7/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_7/output/dense/kernel"
+ input: "bert/encoder/layer_7/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_7/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/beta"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_8/attention/output/dense/bias"
+ input: "bert/encoder/layer_8/attention/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_8/attention/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_8/attention/output/dense/kernel"
+ input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_8/attention/self/key/bias"
+ input: "bert/encoder/layer_8/attention/self/key/bias/adam_m"
+ input: "bert/encoder/layer_8/attention/self/key/bias/adam_v"
+ input: "bert/encoder/layer_8/attention/self/key/kernel"
+ input: "bert/encoder/layer_8/attention/self/key/kernel/adam_m"
+ input: "bert/encoder/layer_8/attention/self/key/kernel/adam_v"
+ input: "bert/encoder/layer_8/attention/self/query/bias"
+ input: "bert/encoder/layer_8/attention/self/query/bias/adam_m"
+ input: "bert/encoder/layer_8/attention/self/query/bias/adam_v"
+ input: "bert/encoder/layer_8/attention/self/query/kernel"
+ input: "bert/encoder/layer_8/attention/self/query/kernel/adam_m"
+ input: "bert/encoder/layer_8/attention/self/query/kernel/adam_v"
+ input: "bert/encoder/layer_8/attention/self/value/bias"
+ input: "bert/encoder/layer_8/attention/self/value/bias/adam_m"
+ input: "bert/encoder/layer_8/attention/self/value/bias/adam_v"
+ input: "bert/encoder/layer_8/attention/self/value/kernel"
+ input: "bert/encoder/layer_8/attention/self/value/kernel/adam_m"
+ input: "bert/encoder/layer_8/attention/self/value/kernel/adam_v"
+ input: "bert/encoder/layer_8/intermediate/dense/bias"
+ input: "bert/encoder/layer_8/intermediate/dense/bias/adam_m"
+ input: "bert/encoder/layer_8/intermediate/dense/bias/adam_v"
+ input: "bert/encoder/layer_8/intermediate/dense/kernel"
+ input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m"
+ input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v"
+ input: "bert/encoder/layer_8/output/LayerNorm/beta"
+ input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_8/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_8/output/dense/bias"
+ input: "bert/encoder/layer_8/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_8/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_8/output/dense/kernel"
+ input: "bert/encoder/layer_8/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_8/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/beta"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_9/attention/output/dense/bias"
+ input: "bert/encoder/layer_9/attention/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_9/attention/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_9/attention/output/dense/kernel"
+ input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v"
+ input: "bert/encoder/layer_9/attention/self/key/bias"
+ input: "bert/encoder/layer_9/attention/self/key/bias/adam_m"
+ input: "bert/encoder/layer_9/attention/self/key/bias/adam_v"
+ input: "bert/encoder/layer_9/attention/self/key/kernel"
+ input: "bert/encoder/layer_9/attention/self/key/kernel/adam_m"
+ input: "bert/encoder/layer_9/attention/self/key/kernel/adam_v"
+ input: "bert/encoder/layer_9/attention/self/query/bias"
+ input: "bert/encoder/layer_9/attention/self/query/bias/adam_m"
+ input: "bert/encoder/layer_9/attention/self/query/bias/adam_v"
+ input: "bert/encoder/layer_9/attention/self/query/kernel"
+ input: "bert/encoder/layer_9/attention/self/query/kernel/adam_m"
+ input: "bert/encoder/layer_9/attention/self/query/kernel/adam_v"
+ input: "bert/encoder/layer_9/attention/self/value/bias"
+ input: "bert/encoder/layer_9/attention/self/value/bias/adam_m"
+ input: "bert/encoder/layer_9/attention/self/value/bias/adam_v"
+ input: "bert/encoder/layer_9/attention/self/value/kernel"
+ input: "bert/encoder/layer_9/attention/self/value/kernel/adam_m"
+ input: "bert/encoder/layer_9/attention/self/value/kernel/adam_v"
+ input: "bert/encoder/layer_9/intermediate/dense/bias"
+ input: "bert/encoder/layer_9/intermediate/dense/bias/adam_m"
+ input: "bert/encoder/layer_9/intermediate/dense/bias/adam_v"
+ input: "bert/encoder/layer_9/intermediate/dense/kernel"
+ input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m"
+ input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v"
+ input: "bert/encoder/layer_9/output/LayerNorm/beta"
+ input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m"
+ input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v"
+ input: "bert/encoder/layer_9/output/LayerNorm/gamma"
+ input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m"
+ input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v"
+ input: "bert/encoder/layer_9/output/dense/bias"
+ input: "bert/encoder/layer_9/output/dense/bias/adam_m"
+ input: "bert/encoder/layer_9/output/dense/bias/adam_v"
+ input: "bert/encoder/layer_9/output/dense/kernel"
+ input: "bert/encoder/layer_9/output/dense/kernel/adam_m"
+ input: "bert/encoder/layer_9/output/dense/kernel/adam_v"
+ input: "bert/pooler/dense/bias"
+ input: "bert/pooler/dense/bias/adam_m"
+ input: "bert/pooler/dense/bias/adam_v"
+ input: "bert/pooler/dense/kernel"
+ input: "bert/pooler/dense/kernel/adam_m"
+ input: "bert/pooler/dense/kernel/adam_v"
+ input: "global_step/Read/ReadVariableOp"
+ input: "output_bias"
+ input: "output_bias/adam_m"
+ input: "output_bias/adam_v"
+ input: "output_weights"
+ input: "output_weights/adam_m"
+ input: "output_weights/adam_v"
+ device: "/device:CPU:0"
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_INT64
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "save/control_dependency"
+ op: "Identity"
+ input: "save/ShardedFilename"
+ input: "^save/SaveV2"
+ device: "/device:CPU:0"
+ attr {
+ key: "T"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@save/ShardedFilename"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "save/MergeV2Checkpoints/checkpoint_prefixes"
+ op: "Pack"
+ input: "save/ShardedFilename"
+ input: "^save/control_dependency"
+ device: "/device:CPU:0"
+ attr {
+ key: "N"
+ value {
+ i: 1
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "axis"
+ value {
+ i: 0
+ }
+ }
+}
+node {
+ name: "save/MergeV2Checkpoints"
+ op: "MergeV2Checkpoints"
+ input: "save/MergeV2Checkpoints/checkpoint_prefixes"
+ input: "save/Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "delete_old_dirs"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Identity"
+ op: "Identity"
+ input: "save/Const"
+ input: "^save/MergeV2Checkpoints"
+ input: "^save/control_dependency"
+ device: "/device:CPU:0"
+ attr {
+ key: "T"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ }
+ }
+ }
+ }
+}
+node {
+ name: "save/RestoreV2/tensor_names"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 604
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 604
+ }
+ }
+ string_val: "bert/embeddings/LayerNorm/beta"
+ string_val: "bert/embeddings/LayerNorm/beta/adam_m"
+ string_val: "bert/embeddings/LayerNorm/beta/adam_v"
+ string_val: "bert/embeddings/LayerNorm/gamma"
+ string_val: "bert/embeddings/LayerNorm/gamma/adam_m"
+ string_val: "bert/embeddings/LayerNorm/gamma/adam_v"
+ string_val: "bert/embeddings/position_embeddings"
+ string_val: "bert/embeddings/position_embeddings/adam_m"
+ string_val: "bert/embeddings/position_embeddings/adam_v"
+ string_val: "bert/embeddings/token_type_embeddings"
+ string_val: "bert/embeddings/token_type_embeddings/adam_m"
+ string_val: "bert/embeddings/token_type_embeddings/adam_v"
+ string_val: "bert/embeddings/word_embeddings"
+ string_val: "bert/embeddings/word_embeddings/adam_m"
+ string_val: "bert/embeddings/word_embeddings/adam_v"
+ string_val: "bert/encoder/layer_0/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_0/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_0/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_0/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_0/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_0/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_0/attention/self/key/bias"
+ string_val: "bert/encoder/layer_0/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_0/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_0/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_0/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_0/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_0/attention/self/query/bias"
+ string_val: "bert/encoder/layer_0/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_0/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_0/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_0/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_0/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_0/attention/self/value/bias"
+ string_val: "bert/encoder/layer_0/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_0/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_0/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_0/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_0/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_0/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_0/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_0/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_0/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_0/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_0/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_0/output/dense/bias"
+ string_val: "bert/encoder/layer_0/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_0/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_0/output/dense/kernel"
+ string_val: "bert/encoder/layer_0/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_0/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_1/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_1/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_1/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_1/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_1/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_1/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_1/attention/self/key/bias"
+ string_val: "bert/encoder/layer_1/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_1/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_1/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_1/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_1/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_1/attention/self/query/bias"
+ string_val: "bert/encoder/layer_1/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_1/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_1/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_1/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_1/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_1/attention/self/value/bias"
+ string_val: "bert/encoder/layer_1/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_1/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_1/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_1/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_1/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_1/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_1/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_1/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_1/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_1/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_1/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_1/output/dense/bias"
+ string_val: "bert/encoder/layer_1/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_1/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_1/output/dense/kernel"
+ string_val: "bert/encoder/layer_1/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_1/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_10/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_10/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_10/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_10/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_10/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_10/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_10/attention/self/key/bias"
+ string_val: "bert/encoder/layer_10/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_10/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_10/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_10/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_10/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_10/attention/self/query/bias"
+ string_val: "bert/encoder/layer_10/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_10/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_10/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_10/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_10/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_10/attention/self/value/bias"
+ string_val: "bert/encoder/layer_10/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_10/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_10/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_10/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_10/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_10/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_10/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_10/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_10/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_10/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_10/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_10/output/dense/bias"
+ string_val: "bert/encoder/layer_10/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_10/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_10/output/dense/kernel"
+ string_val: "bert/encoder/layer_10/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_10/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_11/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_11/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_11/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_11/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_11/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_11/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_11/attention/self/key/bias"
+ string_val: "bert/encoder/layer_11/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_11/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_11/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_11/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_11/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_11/attention/self/query/bias"
+ string_val: "bert/encoder/layer_11/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_11/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_11/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_11/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_11/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_11/attention/self/value/bias"
+ string_val: "bert/encoder/layer_11/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_11/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_11/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_11/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_11/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_11/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_11/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_11/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_11/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_11/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_11/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_11/output/dense/bias"
+ string_val: "bert/encoder/layer_11/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_11/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_11/output/dense/kernel"
+ string_val: "bert/encoder/layer_11/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_11/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_2/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_2/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_2/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_2/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_2/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_2/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_2/attention/self/key/bias"
+ string_val: "bert/encoder/layer_2/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_2/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_2/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_2/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_2/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_2/attention/self/query/bias"
+ string_val: "bert/encoder/layer_2/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_2/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_2/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_2/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_2/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_2/attention/self/value/bias"
+ string_val: "bert/encoder/layer_2/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_2/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_2/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_2/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_2/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_2/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_2/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_2/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_2/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_2/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_2/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_2/output/dense/bias"
+ string_val: "bert/encoder/layer_2/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_2/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_2/output/dense/kernel"
+ string_val: "bert/encoder/layer_2/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_2/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_3/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_3/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_3/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_3/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_3/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_3/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_3/attention/self/key/bias"
+ string_val: "bert/encoder/layer_3/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_3/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_3/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_3/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_3/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_3/attention/self/query/bias"
+ string_val: "bert/encoder/layer_3/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_3/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_3/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_3/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_3/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_3/attention/self/value/bias"
+ string_val: "bert/encoder/layer_3/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_3/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_3/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_3/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_3/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_3/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_3/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_3/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_3/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_3/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_3/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_3/output/dense/bias"
+ string_val: "bert/encoder/layer_3/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_3/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_3/output/dense/kernel"
+ string_val: "bert/encoder/layer_3/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_3/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_4/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_4/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_4/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_4/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_4/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_4/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_4/attention/self/key/bias"
+ string_val: "bert/encoder/layer_4/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_4/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_4/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_4/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_4/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_4/attention/self/query/bias"
+ string_val: "bert/encoder/layer_4/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_4/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_4/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_4/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_4/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_4/attention/self/value/bias"
+ string_val: "bert/encoder/layer_4/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_4/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_4/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_4/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_4/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_4/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_4/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_4/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_4/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_4/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_4/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_4/output/dense/bias"
+ string_val: "bert/encoder/layer_4/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_4/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_4/output/dense/kernel"
+ string_val: "bert/encoder/layer_4/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_4/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_5/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_5/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_5/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_5/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_5/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_5/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_5/attention/self/key/bias"
+ string_val: "bert/encoder/layer_5/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_5/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_5/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_5/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_5/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_5/attention/self/query/bias"
+ string_val: "bert/encoder/layer_5/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_5/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_5/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_5/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_5/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_5/attention/self/value/bias"
+ string_val: "bert/encoder/layer_5/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_5/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_5/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_5/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_5/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_5/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_5/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_5/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_5/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_5/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_5/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_5/output/dense/bias"
+ string_val: "bert/encoder/layer_5/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_5/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_5/output/dense/kernel"
+ string_val: "bert/encoder/layer_5/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_5/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_6/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_6/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_6/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_6/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_6/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_6/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_6/attention/self/key/bias"
+ string_val: "bert/encoder/layer_6/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_6/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_6/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_6/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_6/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_6/attention/self/query/bias"
+ string_val: "bert/encoder/layer_6/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_6/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_6/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_6/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_6/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_6/attention/self/value/bias"
+ string_val: "bert/encoder/layer_6/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_6/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_6/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_6/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_6/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_6/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_6/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_6/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_6/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_6/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_6/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_6/output/dense/bias"
+ string_val: "bert/encoder/layer_6/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_6/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_6/output/dense/kernel"
+ string_val: "bert/encoder/layer_6/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_6/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_7/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_7/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_7/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_7/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_7/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_7/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_7/attention/self/key/bias"
+ string_val: "bert/encoder/layer_7/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_7/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_7/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_7/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_7/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_7/attention/self/query/bias"
+ string_val: "bert/encoder/layer_7/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_7/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_7/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_7/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_7/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_7/attention/self/value/bias"
+ string_val: "bert/encoder/layer_7/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_7/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_7/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_7/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_7/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_7/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_7/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_7/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_7/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_7/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_7/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_7/output/dense/bias"
+ string_val: "bert/encoder/layer_7/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_7/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_7/output/dense/kernel"
+ string_val: "bert/encoder/layer_7/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_7/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_8/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_8/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_8/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_8/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_8/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_8/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_8/attention/self/key/bias"
+ string_val: "bert/encoder/layer_8/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_8/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_8/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_8/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_8/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_8/attention/self/query/bias"
+ string_val: "bert/encoder/layer_8/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_8/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_8/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_8/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_8/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_8/attention/self/value/bias"
+ string_val: "bert/encoder/layer_8/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_8/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_8/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_8/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_8/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_8/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_8/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_8/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_8/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_8/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_8/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_8/output/dense/bias"
+ string_val: "bert/encoder/layer_8/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_8/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_8/output/dense/kernel"
+ string_val: "bert/encoder/layer_8/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_8/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_9/attention/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_9/attention/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_9/attention/output/dense/bias"
+ string_val: "bert/encoder/layer_9/attention/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_9/attention/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_9/attention/output/dense/kernel"
+ string_val: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_9/attention/self/key/bias"
+ string_val: "bert/encoder/layer_9/attention/self/key/bias/adam_m"
+ string_val: "bert/encoder/layer_9/attention/self/key/bias/adam_v"
+ string_val: "bert/encoder/layer_9/attention/self/key/kernel"
+ string_val: "bert/encoder/layer_9/attention/self/key/kernel/adam_m"
+ string_val: "bert/encoder/layer_9/attention/self/key/kernel/adam_v"
+ string_val: "bert/encoder/layer_9/attention/self/query/bias"
+ string_val: "bert/encoder/layer_9/attention/self/query/bias/adam_m"
+ string_val: "bert/encoder/layer_9/attention/self/query/bias/adam_v"
+ string_val: "bert/encoder/layer_9/attention/self/query/kernel"
+ string_val: "bert/encoder/layer_9/attention/self/query/kernel/adam_m"
+ string_val: "bert/encoder/layer_9/attention/self/query/kernel/adam_v"
+ string_val: "bert/encoder/layer_9/attention/self/value/bias"
+ string_val: "bert/encoder/layer_9/attention/self/value/bias/adam_m"
+ string_val: "bert/encoder/layer_9/attention/self/value/bias/adam_v"
+ string_val: "bert/encoder/layer_9/attention/self/value/kernel"
+ string_val: "bert/encoder/layer_9/attention/self/value/kernel/adam_m"
+ string_val: "bert/encoder/layer_9/attention/self/value/kernel/adam_v"
+ string_val: "bert/encoder/layer_9/intermediate/dense/bias"
+ string_val: "bert/encoder/layer_9/intermediate/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_9/intermediate/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_9/intermediate/dense/kernel"
+ string_val: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v"
+ string_val: "bert/encoder/layer_9/output/LayerNorm/beta"
+ string_val: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m"
+ string_val: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v"
+ string_val: "bert/encoder/layer_9/output/LayerNorm/gamma"
+ string_val: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m"
+ string_val: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v"
+ string_val: "bert/encoder/layer_9/output/dense/bias"
+ string_val: "bert/encoder/layer_9/output/dense/bias/adam_m"
+ string_val: "bert/encoder/layer_9/output/dense/bias/adam_v"
+ string_val: "bert/encoder/layer_9/output/dense/kernel"
+ string_val: "bert/encoder/layer_9/output/dense/kernel/adam_m"
+ string_val: "bert/encoder/layer_9/output/dense/kernel/adam_v"
+ string_val: "bert/pooler/dense/bias"
+ string_val: "bert/pooler/dense/bias/adam_m"
+ string_val: "bert/pooler/dense/bias/adam_v"
+ string_val: "bert/pooler/dense/kernel"
+ string_val: "bert/pooler/dense/kernel/adam_m"
+ string_val: "bert/pooler/dense/kernel/adam_v"
+ string_val: "global_step"
+ string_val: "output_bias"
+ string_val: "output_bias/adam_m"
+ string_val: "output_bias/adam_v"
+ string_val: "output_weights"
+ string_val: "output_weights/adam_m"
+ string_val: "output_weights/adam_v"
+ }
+ }
+ }
+}
+node {
+ name: "save/RestoreV2/shape_and_slices"
+ op: "Const"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 604
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_STRING
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_STRING
+ tensor_shape {
+ dim {
+ size: 604
+ }
+ }
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ string_val: ""
+ }
+ }
+ }
+}
+node {
+ name: "save/RestoreV2"
+ op: "RestoreV2"
+ input: "save/Const"
+ input: "save/RestoreV2/tensor_names"
+ input: "save/RestoreV2/shape_and_slices"
+ device: "/device:CPU:0"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ shape {
+ unknown_rank: true
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtypes"
+ value {
+ list {
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_INT64
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ type: DT_FLOAT
+ }
+ }
+ }
+}
+node {
+ name: "save/Assign"
+ op: "Assign"
+ input: "bert/embeddings/LayerNorm/beta"
+ input: "save/RestoreV2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_1"
+ op: "Assign"
+ input: "bert/embeddings/LayerNorm/beta/adam_m"
+ input: "save/RestoreV2:1"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_2"
+ op: "Assign"
+ input: "bert/embeddings/LayerNorm/beta/adam_v"
+ input: "save/RestoreV2:2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_3"
+ op: "Assign"
+ input: "bert/embeddings/LayerNorm/gamma"
+ input: "save/RestoreV2:3"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_4"
+ op: "Assign"
+ input: "bert/embeddings/LayerNorm/gamma/adam_m"
+ input: "save/RestoreV2:4"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_5"
+ op: "Assign"
+ input: "bert/embeddings/LayerNorm/gamma/adam_v"
+ input: "save/RestoreV2:5"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_6"
+ op: "Assign"
+ input: "bert/embeddings/position_embeddings"
+ input: "save/RestoreV2:6"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/position_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_7"
+ op: "Assign"
+ input: "bert/embeddings/position_embeddings/adam_m"
+ input: "save/RestoreV2:7"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/position_embeddings/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_8"
+ op: "Assign"
+ input: "bert/embeddings/position_embeddings/adam_v"
+ input: "save/RestoreV2:8"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/position_embeddings/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 512
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_9"
+ op: "Assign"
+ input: "bert/embeddings/token_type_embeddings"
+ input: "save/RestoreV2:9"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/token_type_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_10"
+ op: "Assign"
+ input: "bert/embeddings/token_type_embeddings/adam_m"
+ input: "save/RestoreV2:10"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/token_type_embeddings/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_11"
+ op: "Assign"
+ input: "bert/embeddings/token_type_embeddings/adam_v"
+ input: "save/RestoreV2:11"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/token_type_embeddings/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_12"
+ op: "Assign"
+ input: "bert/embeddings/word_embeddings"
+ input: "save/RestoreV2:12"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/word_embeddings"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 21128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_13"
+ op: "Assign"
+ input: "bert/embeddings/word_embeddings/adam_m"
+ input: "save/RestoreV2:13"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/word_embeddings/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 21128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_14"
+ op: "Assign"
+ input: "bert/embeddings/word_embeddings/adam_v"
+ input: "save/RestoreV2:14"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/embeddings/word_embeddings/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 21128
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_15"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/beta"
+ input: "save/RestoreV2:15"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_16"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m"
+ input: "save/RestoreV2:16"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_17"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v"
+ input: "save/RestoreV2:17"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_18"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma"
+ input: "save/RestoreV2:18"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_19"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m"
+ input: "save/RestoreV2:19"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_20"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v"
+ input: "save/RestoreV2:20"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_21"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/output/dense/bias"
+ input: "save/RestoreV2:21"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_22"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/output/dense/bias/adam_m"
+ input: "save/RestoreV2:22"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_23"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/output/dense/bias/adam_v"
+ input: "save/RestoreV2:23"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_24"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/output/dense/kernel"
+ input: "save/RestoreV2:24"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_25"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m"
+ input: "save/RestoreV2:25"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_26"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v"
+ input: "save/RestoreV2:26"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_27"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/key/bias"
+ input: "save/RestoreV2:27"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_28"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/key/bias/adam_m"
+ input: "save/RestoreV2:28"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_29"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/key/bias/adam_v"
+ input: "save/RestoreV2:29"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_30"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/key/kernel"
+ input: "save/RestoreV2:30"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_31"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/key/kernel/adam_m"
+ input: "save/RestoreV2:31"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_32"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/key/kernel/adam_v"
+ input: "save/RestoreV2:32"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_33"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/query/bias"
+ input: "save/RestoreV2:33"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_34"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/query/bias/adam_m"
+ input: "save/RestoreV2:34"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_35"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/query/bias/adam_v"
+ input: "save/RestoreV2:35"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_36"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/query/kernel"
+ input: "save/RestoreV2:36"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_37"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/query/kernel/adam_m"
+ input: "save/RestoreV2:37"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_38"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/query/kernel/adam_v"
+ input: "save/RestoreV2:38"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_39"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/value/bias"
+ input: "save/RestoreV2:39"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_40"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/value/bias/adam_m"
+ input: "save/RestoreV2:40"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_41"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/value/bias/adam_v"
+ input: "save/RestoreV2:41"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_42"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/value/kernel"
+ input: "save/RestoreV2:42"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_43"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/value/kernel/adam_m"
+ input: "save/RestoreV2:43"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_44"
+ op: "Assign"
+ input: "bert/encoder/layer_0/attention/self/value/kernel/adam_v"
+ input: "save/RestoreV2:44"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_45"
+ op: "Assign"
+ input: "bert/encoder/layer_0/intermediate/dense/bias"
+ input: "save/RestoreV2:45"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_46"
+ op: "Assign"
+ input: "bert/encoder/layer_0/intermediate/dense/bias/adam_m"
+ input: "save/RestoreV2:46"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_47"
+ op: "Assign"
+ input: "bert/encoder/layer_0/intermediate/dense/bias/adam_v"
+ input: "save/RestoreV2:47"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_48"
+ op: "Assign"
+ input: "bert/encoder/layer_0/intermediate/dense/kernel"
+ input: "save/RestoreV2:48"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_49"
+ op: "Assign"
+ input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m"
+ input: "save/RestoreV2:49"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_50"
+ op: "Assign"
+ input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v"
+ input: "save/RestoreV2:50"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_51"
+ op: "Assign"
+ input: "bert/encoder/layer_0/output/LayerNorm/beta"
+ input: "save/RestoreV2:51"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_52"
+ op: "Assign"
+ input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m"
+ input: "save/RestoreV2:52"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_53"
+ op: "Assign"
+ input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v"
+ input: "save/RestoreV2:53"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_54"
+ op: "Assign"
+ input: "bert/encoder/layer_0/output/LayerNorm/gamma"
+ input: "save/RestoreV2:54"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_55"
+ op: "Assign"
+ input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m"
+ input: "save/RestoreV2:55"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_56"
+ op: "Assign"
+ input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v"
+ input: "save/RestoreV2:56"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_57"
+ op: "Assign"
+ input: "bert/encoder/layer_0/output/dense/bias"
+ input: "save/RestoreV2:57"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_58"
+ op: "Assign"
+ input: "bert/encoder/layer_0/output/dense/bias/adam_m"
+ input: "save/RestoreV2:58"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_59"
+ op: "Assign"
+ input: "bert/encoder/layer_0/output/dense/bias/adam_v"
+ input: "save/RestoreV2:59"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_60"
+ op: "Assign"
+ input: "bert/encoder/layer_0/output/dense/kernel"
+ input: "save/RestoreV2:60"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_61"
+ op: "Assign"
+ input: "bert/encoder/layer_0/output/dense/kernel/adam_m"
+ input: "save/RestoreV2:61"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_62"
+ op: "Assign"
+ input: "bert/encoder/layer_0/output/dense/kernel/adam_v"
+ input: "save/RestoreV2:62"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_63"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/beta"
+ input: "save/RestoreV2:63"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_64"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m"
+ input: "save/RestoreV2:64"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_65"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v"
+ input: "save/RestoreV2:65"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_66"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma"
+ input: "save/RestoreV2:66"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_67"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m"
+ input: "save/RestoreV2:67"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_68"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v"
+ input: "save/RestoreV2:68"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_69"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/output/dense/bias"
+ input: "save/RestoreV2:69"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_70"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/output/dense/bias/adam_m"
+ input: "save/RestoreV2:70"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_71"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/output/dense/bias/adam_v"
+ input: "save/RestoreV2:71"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_72"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/output/dense/kernel"
+ input: "save/RestoreV2:72"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_73"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m"
+ input: "save/RestoreV2:73"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_74"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v"
+ input: "save/RestoreV2:74"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_75"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/key/bias"
+ input: "save/RestoreV2:75"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_76"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/key/bias/adam_m"
+ input: "save/RestoreV2:76"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_77"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/key/bias/adam_v"
+ input: "save/RestoreV2:77"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_78"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/key/kernel"
+ input: "save/RestoreV2:78"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_79"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/key/kernel/adam_m"
+ input: "save/RestoreV2:79"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_80"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/key/kernel/adam_v"
+ input: "save/RestoreV2:80"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_81"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/query/bias"
+ input: "save/RestoreV2:81"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_82"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/query/bias/adam_m"
+ input: "save/RestoreV2:82"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_83"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/query/bias/adam_v"
+ input: "save/RestoreV2:83"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_84"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/query/kernel"
+ input: "save/RestoreV2:84"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_85"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/query/kernel/adam_m"
+ input: "save/RestoreV2:85"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_86"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/query/kernel/adam_v"
+ input: "save/RestoreV2:86"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_87"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/value/bias"
+ input: "save/RestoreV2:87"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_88"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/value/bias/adam_m"
+ input: "save/RestoreV2:88"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_89"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/value/bias/adam_v"
+ input: "save/RestoreV2:89"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_90"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/value/kernel"
+ input: "save/RestoreV2:90"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_91"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/value/kernel/adam_m"
+ input: "save/RestoreV2:91"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_92"
+ op: "Assign"
+ input: "bert/encoder/layer_1/attention/self/value/kernel/adam_v"
+ input: "save/RestoreV2:92"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_93"
+ op: "Assign"
+ input: "bert/encoder/layer_1/intermediate/dense/bias"
+ input: "save/RestoreV2:93"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_94"
+ op: "Assign"
+ input: "bert/encoder/layer_1/intermediate/dense/bias/adam_m"
+ input: "save/RestoreV2:94"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_95"
+ op: "Assign"
+ input: "bert/encoder/layer_1/intermediate/dense/bias/adam_v"
+ input: "save/RestoreV2:95"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_96"
+ op: "Assign"
+ input: "bert/encoder/layer_1/intermediate/dense/kernel"
+ input: "save/RestoreV2:96"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_97"
+ op: "Assign"
+ input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m"
+ input: "save/RestoreV2:97"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_98"
+ op: "Assign"
+ input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v"
+ input: "save/RestoreV2:98"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_99"
+ op: "Assign"
+ input: "bert/encoder/layer_1/output/LayerNorm/beta"
+ input: "save/RestoreV2:99"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_100"
+ op: "Assign"
+ input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m"
+ input: "save/RestoreV2:100"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_101"
+ op: "Assign"
+ input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v"
+ input: "save/RestoreV2:101"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_102"
+ op: "Assign"
+ input: "bert/encoder/layer_1/output/LayerNorm/gamma"
+ input: "save/RestoreV2:102"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_103"
+ op: "Assign"
+ input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m"
+ input: "save/RestoreV2:103"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_104"
+ op: "Assign"
+ input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v"
+ input: "save/RestoreV2:104"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_105"
+ op: "Assign"
+ input: "bert/encoder/layer_1/output/dense/bias"
+ input: "save/RestoreV2:105"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_106"
+ op: "Assign"
+ input: "bert/encoder/layer_1/output/dense/bias/adam_m"
+ input: "save/RestoreV2:106"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_107"
+ op: "Assign"
+ input: "bert/encoder/layer_1/output/dense/bias/adam_v"
+ input: "save/RestoreV2:107"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_108"
+ op: "Assign"
+ input: "bert/encoder/layer_1/output/dense/kernel"
+ input: "save/RestoreV2:108"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_109"
+ op: "Assign"
+ input: "bert/encoder/layer_1/output/dense/kernel/adam_m"
+ input: "save/RestoreV2:109"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_110"
+ op: "Assign"
+ input: "bert/encoder/layer_1/output/dense/kernel/adam_v"
+ input: "save/RestoreV2:110"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_111"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/beta"
+ input: "save/RestoreV2:111"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_112"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m"
+ input: "save/RestoreV2:112"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_113"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v"
+ input: "save/RestoreV2:113"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_114"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma"
+ input: "save/RestoreV2:114"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_115"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m"
+ input: "save/RestoreV2:115"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_116"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v"
+ input: "save/RestoreV2:116"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_117"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/output/dense/bias"
+ input: "save/RestoreV2:117"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_118"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/output/dense/bias/adam_m"
+ input: "save/RestoreV2:118"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_119"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/output/dense/bias/adam_v"
+ input: "save/RestoreV2:119"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_120"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/output/dense/kernel"
+ input: "save/RestoreV2:120"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_121"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m"
+ input: "save/RestoreV2:121"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_122"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v"
+ input: "save/RestoreV2:122"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_123"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/key/bias"
+ input: "save/RestoreV2:123"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_124"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/key/bias/adam_m"
+ input: "save/RestoreV2:124"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_125"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/key/bias/adam_v"
+ input: "save/RestoreV2:125"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_126"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/key/kernel"
+ input: "save/RestoreV2:126"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_127"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/key/kernel/adam_m"
+ input: "save/RestoreV2:127"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_128"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/key/kernel/adam_v"
+ input: "save/RestoreV2:128"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_129"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/query/bias"
+ input: "save/RestoreV2:129"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_130"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/query/bias/adam_m"
+ input: "save/RestoreV2:130"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_131"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/query/bias/adam_v"
+ input: "save/RestoreV2:131"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_132"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/query/kernel"
+ input: "save/RestoreV2:132"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_133"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/query/kernel/adam_m"
+ input: "save/RestoreV2:133"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_134"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/query/kernel/adam_v"
+ input: "save/RestoreV2:134"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_135"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/value/bias"
+ input: "save/RestoreV2:135"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_136"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/value/bias/adam_m"
+ input: "save/RestoreV2:136"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_137"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/value/bias/adam_v"
+ input: "save/RestoreV2:137"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_138"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/value/kernel"
+ input: "save/RestoreV2:138"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_139"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/value/kernel/adam_m"
+ input: "save/RestoreV2:139"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_140"
+ op: "Assign"
+ input: "bert/encoder/layer_10/attention/self/value/kernel/adam_v"
+ input: "save/RestoreV2:140"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_141"
+ op: "Assign"
+ input: "bert/encoder/layer_10/intermediate/dense/bias"
+ input: "save/RestoreV2:141"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_142"
+ op: "Assign"
+ input: "bert/encoder/layer_10/intermediate/dense/bias/adam_m"
+ input: "save/RestoreV2:142"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_143"
+ op: "Assign"
+ input: "bert/encoder/layer_10/intermediate/dense/bias/adam_v"
+ input: "save/RestoreV2:143"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_144"
+ op: "Assign"
+ input: "bert/encoder/layer_10/intermediate/dense/kernel"
+ input: "save/RestoreV2:144"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_145"
+ op: "Assign"
+ input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m"
+ input: "save/RestoreV2:145"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_146"
+ op: "Assign"
+ input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v"
+ input: "save/RestoreV2:146"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_147"
+ op: "Assign"
+ input: "bert/encoder/layer_10/output/LayerNorm/beta"
+ input: "save/RestoreV2:147"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_148"
+ op: "Assign"
+ input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m"
+ input: "save/RestoreV2:148"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_149"
+ op: "Assign"
+ input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v"
+ input: "save/RestoreV2:149"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_150"
+ op: "Assign"
+ input: "bert/encoder/layer_10/output/LayerNorm/gamma"
+ input: "save/RestoreV2:150"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_151"
+ op: "Assign"
+ input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m"
+ input: "save/RestoreV2:151"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_152"
+ op: "Assign"
+ input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v"
+ input: "save/RestoreV2:152"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_153"
+ op: "Assign"
+ input: "bert/encoder/layer_10/output/dense/bias"
+ input: "save/RestoreV2:153"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_154"
+ op: "Assign"
+ input: "bert/encoder/layer_10/output/dense/bias/adam_m"
+ input: "save/RestoreV2:154"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_155"
+ op: "Assign"
+ input: "bert/encoder/layer_10/output/dense/bias/adam_v"
+ input: "save/RestoreV2:155"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_156"
+ op: "Assign"
+ input: "bert/encoder/layer_10/output/dense/kernel"
+ input: "save/RestoreV2:156"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_157"
+ op: "Assign"
+ input: "bert/encoder/layer_10/output/dense/kernel/adam_m"
+ input: "save/RestoreV2:157"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_158"
+ op: "Assign"
+ input: "bert/encoder/layer_10/output/dense/kernel/adam_v"
+ input: "save/RestoreV2:158"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_159"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/beta"
+ input: "save/RestoreV2:159"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_160"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m"
+ input: "save/RestoreV2:160"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_161"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v"
+ input: "save/RestoreV2:161"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_162"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma"
+ input: "save/RestoreV2:162"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_163"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m"
+ input: "save/RestoreV2:163"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_164"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v"
+ input: "save/RestoreV2:164"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_165"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/output/dense/bias"
+ input: "save/RestoreV2:165"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_166"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/output/dense/bias/adam_m"
+ input: "save/RestoreV2:166"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_167"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/output/dense/bias/adam_v"
+ input: "save/RestoreV2:167"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_168"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/output/dense/kernel"
+ input: "save/RestoreV2:168"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_169"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m"
+ input: "save/RestoreV2:169"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_170"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v"
+ input: "save/RestoreV2:170"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_171"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/key/bias"
+ input: "save/RestoreV2:171"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_172"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/key/bias/adam_m"
+ input: "save/RestoreV2:172"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_173"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/key/bias/adam_v"
+ input: "save/RestoreV2:173"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_174"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/key/kernel"
+ input: "save/RestoreV2:174"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_175"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/key/kernel/adam_m"
+ input: "save/RestoreV2:175"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_176"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/key/kernel/adam_v"
+ input: "save/RestoreV2:176"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_177"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/query/bias"
+ input: "save/RestoreV2:177"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_178"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/query/bias/adam_m"
+ input: "save/RestoreV2:178"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_179"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/query/bias/adam_v"
+ input: "save/RestoreV2:179"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_180"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/query/kernel"
+ input: "save/RestoreV2:180"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_181"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/query/kernel/adam_m"
+ input: "save/RestoreV2:181"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_182"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/query/kernel/adam_v"
+ input: "save/RestoreV2:182"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_183"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/value/bias"
+ input: "save/RestoreV2:183"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_184"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/value/bias/adam_m"
+ input: "save/RestoreV2:184"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_185"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/value/bias/adam_v"
+ input: "save/RestoreV2:185"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_186"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/value/kernel"
+ input: "save/RestoreV2:186"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_187"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/value/kernel/adam_m"
+ input: "save/RestoreV2:187"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_188"
+ op: "Assign"
+ input: "bert/encoder/layer_11/attention/self/value/kernel/adam_v"
+ input: "save/RestoreV2:188"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_189"
+ op: "Assign"
+ input: "bert/encoder/layer_11/intermediate/dense/bias"
+ input: "save/RestoreV2:189"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_190"
+ op: "Assign"
+ input: "bert/encoder/layer_11/intermediate/dense/bias/adam_m"
+ input: "save/RestoreV2:190"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_191"
+ op: "Assign"
+ input: "bert/encoder/layer_11/intermediate/dense/bias/adam_v"
+ input: "save/RestoreV2:191"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_192"
+ op: "Assign"
+ input: "bert/encoder/layer_11/intermediate/dense/kernel"
+ input: "save/RestoreV2:192"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_193"
+ op: "Assign"
+ input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m"
+ input: "save/RestoreV2:193"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_194"
+ op: "Assign"
+ input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v"
+ input: "save/RestoreV2:194"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_195"
+ op: "Assign"
+ input: "bert/encoder/layer_11/output/LayerNorm/beta"
+ input: "save/RestoreV2:195"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_196"
+ op: "Assign"
+ input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m"
+ input: "save/RestoreV2:196"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_197"
+ op: "Assign"
+ input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v"
+ input: "save/RestoreV2:197"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_198"
+ op: "Assign"
+ input: "bert/encoder/layer_11/output/LayerNorm/gamma"
+ input: "save/RestoreV2:198"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_199"
+ op: "Assign"
+ input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m"
+ input: "save/RestoreV2:199"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_200"
+ op: "Assign"
+ input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v"
+ input: "save/RestoreV2:200"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_201"
+ op: "Assign"
+ input: "bert/encoder/layer_11/output/dense/bias"
+ input: "save/RestoreV2:201"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_202"
+ op: "Assign"
+ input: "bert/encoder/layer_11/output/dense/bias/adam_m"
+ input: "save/RestoreV2:202"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_203"
+ op: "Assign"
+ input: "bert/encoder/layer_11/output/dense/bias/adam_v"
+ input: "save/RestoreV2:203"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_204"
+ op: "Assign"
+ input: "bert/encoder/layer_11/output/dense/kernel"
+ input: "save/RestoreV2:204"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_205"
+ op: "Assign"
+ input: "bert/encoder/layer_11/output/dense/kernel/adam_m"
+ input: "save/RestoreV2:205"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_206"
+ op: "Assign"
+ input: "bert/encoder/layer_11/output/dense/kernel/adam_v"
+ input: "save/RestoreV2:206"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_207"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/beta"
+ input: "save/RestoreV2:207"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_208"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m"
+ input: "save/RestoreV2:208"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_209"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v"
+ input: "save/RestoreV2:209"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_210"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma"
+ input: "save/RestoreV2:210"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_211"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m"
+ input: "save/RestoreV2:211"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_212"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v"
+ input: "save/RestoreV2:212"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_213"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/output/dense/bias"
+ input: "save/RestoreV2:213"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_214"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/output/dense/bias/adam_m"
+ input: "save/RestoreV2:214"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_215"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/output/dense/bias/adam_v"
+ input: "save/RestoreV2:215"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_216"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/output/dense/kernel"
+ input: "save/RestoreV2:216"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_217"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m"
+ input: "save/RestoreV2:217"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_218"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v"
+ input: "save/RestoreV2:218"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_219"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/key/bias"
+ input: "save/RestoreV2:219"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_220"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/key/bias/adam_m"
+ input: "save/RestoreV2:220"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_221"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/key/bias/adam_v"
+ input: "save/RestoreV2:221"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_222"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/key/kernel"
+ input: "save/RestoreV2:222"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_223"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/key/kernel/adam_m"
+ input: "save/RestoreV2:223"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_224"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/key/kernel/adam_v"
+ input: "save/RestoreV2:224"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_225"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/query/bias"
+ input: "save/RestoreV2:225"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_226"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/query/bias/adam_m"
+ input: "save/RestoreV2:226"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_227"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/query/bias/adam_v"
+ input: "save/RestoreV2:227"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_228"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/query/kernel"
+ input: "save/RestoreV2:228"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_229"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/query/kernel/adam_m"
+ input: "save/RestoreV2:229"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_230"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/query/kernel/adam_v"
+ input: "save/RestoreV2:230"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_231"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/value/bias"
+ input: "save/RestoreV2:231"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_232"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/value/bias/adam_m"
+ input: "save/RestoreV2:232"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_233"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/value/bias/adam_v"
+ input: "save/RestoreV2:233"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_234"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/value/kernel"
+ input: "save/RestoreV2:234"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_235"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/value/kernel/adam_m"
+ input: "save/RestoreV2:235"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_236"
+ op: "Assign"
+ input: "bert/encoder/layer_2/attention/self/value/kernel/adam_v"
+ input: "save/RestoreV2:236"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_237"
+ op: "Assign"
+ input: "bert/encoder/layer_2/intermediate/dense/bias"
+ input: "save/RestoreV2:237"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_238"
+ op: "Assign"
+ input: "bert/encoder/layer_2/intermediate/dense/bias/adam_m"
+ input: "save/RestoreV2:238"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_239"
+ op: "Assign"
+ input: "bert/encoder/layer_2/intermediate/dense/bias/adam_v"
+ input: "save/RestoreV2:239"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_240"
+ op: "Assign"
+ input: "bert/encoder/layer_2/intermediate/dense/kernel"
+ input: "save/RestoreV2:240"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_241"
+ op: "Assign"
+ input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m"
+ input: "save/RestoreV2:241"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_242"
+ op: "Assign"
+ input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v"
+ input: "save/RestoreV2:242"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_243"
+ op: "Assign"
+ input: "bert/encoder/layer_2/output/LayerNorm/beta"
+ input: "save/RestoreV2:243"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_244"
+ op: "Assign"
+ input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m"
+ input: "save/RestoreV2:244"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_245"
+ op: "Assign"
+ input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v"
+ input: "save/RestoreV2:245"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_246"
+ op: "Assign"
+ input: "bert/encoder/layer_2/output/LayerNorm/gamma"
+ input: "save/RestoreV2:246"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_247"
+ op: "Assign"
+ input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m"
+ input: "save/RestoreV2:247"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_248"
+ op: "Assign"
+ input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v"
+ input: "save/RestoreV2:248"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_249"
+ op: "Assign"
+ input: "bert/encoder/layer_2/output/dense/bias"
+ input: "save/RestoreV2:249"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_250"
+ op: "Assign"
+ input: "bert/encoder/layer_2/output/dense/bias/adam_m"
+ input: "save/RestoreV2:250"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_251"
+ op: "Assign"
+ input: "bert/encoder/layer_2/output/dense/bias/adam_v"
+ input: "save/RestoreV2:251"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_252"
+ op: "Assign"
+ input: "bert/encoder/layer_2/output/dense/kernel"
+ input: "save/RestoreV2:252"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_253"
+ op: "Assign"
+ input: "bert/encoder/layer_2/output/dense/kernel/adam_m"
+ input: "save/RestoreV2:253"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_254"
+ op: "Assign"
+ input: "bert/encoder/layer_2/output/dense/kernel/adam_v"
+ input: "save/RestoreV2:254"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_255"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/beta"
+ input: "save/RestoreV2:255"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_256"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m"
+ input: "save/RestoreV2:256"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_257"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v"
+ input: "save/RestoreV2:257"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_258"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma"
+ input: "save/RestoreV2:258"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_259"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m"
+ input: "save/RestoreV2:259"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_260"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v"
+ input: "save/RestoreV2:260"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_261"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/output/dense/bias"
+ input: "save/RestoreV2:261"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_262"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/output/dense/bias/adam_m"
+ input: "save/RestoreV2:262"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_263"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/output/dense/bias/adam_v"
+ input: "save/RestoreV2:263"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_264"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/output/dense/kernel"
+ input: "save/RestoreV2:264"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_265"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m"
+ input: "save/RestoreV2:265"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_266"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v"
+ input: "save/RestoreV2:266"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_267"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/key/bias"
+ input: "save/RestoreV2:267"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_268"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/key/bias/adam_m"
+ input: "save/RestoreV2:268"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_269"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/key/bias/adam_v"
+ input: "save/RestoreV2:269"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_270"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/key/kernel"
+ input: "save/RestoreV2:270"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_271"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/key/kernel/adam_m"
+ input: "save/RestoreV2:271"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_272"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/key/kernel/adam_v"
+ input: "save/RestoreV2:272"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_273"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/query/bias"
+ input: "save/RestoreV2:273"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_274"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/query/bias/adam_m"
+ input: "save/RestoreV2:274"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_275"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/query/bias/adam_v"
+ input: "save/RestoreV2:275"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_276"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/query/kernel"
+ input: "save/RestoreV2:276"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_277"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/query/kernel/adam_m"
+ input: "save/RestoreV2:277"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_278"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/query/kernel/adam_v"
+ input: "save/RestoreV2:278"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_279"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/value/bias"
+ input: "save/RestoreV2:279"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_280"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/value/bias/adam_m"
+ input: "save/RestoreV2:280"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_281"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/value/bias/adam_v"
+ input: "save/RestoreV2:281"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_282"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/value/kernel"
+ input: "save/RestoreV2:282"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_283"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/value/kernel/adam_m"
+ input: "save/RestoreV2:283"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_284"
+ op: "Assign"
+ input: "bert/encoder/layer_3/attention/self/value/kernel/adam_v"
+ input: "save/RestoreV2:284"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_285"
+ op: "Assign"
+ input: "bert/encoder/layer_3/intermediate/dense/bias"
+ input: "save/RestoreV2:285"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_286"
+ op: "Assign"
+ input: "bert/encoder/layer_3/intermediate/dense/bias/adam_m"
+ input: "save/RestoreV2:286"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_287"
+ op: "Assign"
+ input: "bert/encoder/layer_3/intermediate/dense/bias/adam_v"
+ input: "save/RestoreV2:287"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_288"
+ op: "Assign"
+ input: "bert/encoder/layer_3/intermediate/dense/kernel"
+ input: "save/RestoreV2:288"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_289"
+ op: "Assign"
+ input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m"
+ input: "save/RestoreV2:289"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_290"
+ op: "Assign"
+ input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v"
+ input: "save/RestoreV2:290"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_291"
+ op: "Assign"
+ input: "bert/encoder/layer_3/output/LayerNorm/beta"
+ input: "save/RestoreV2:291"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_292"
+ op: "Assign"
+ input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m"
+ input: "save/RestoreV2:292"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_293"
+ op: "Assign"
+ input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v"
+ input: "save/RestoreV2:293"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_294"
+ op: "Assign"
+ input: "bert/encoder/layer_3/output/LayerNorm/gamma"
+ input: "save/RestoreV2:294"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_295"
+ op: "Assign"
+ input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m"
+ input: "save/RestoreV2:295"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_296"
+ op: "Assign"
+ input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v"
+ input: "save/RestoreV2:296"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_297"
+ op: "Assign"
+ input: "bert/encoder/layer_3/output/dense/bias"
+ input: "save/RestoreV2:297"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_298"
+ op: "Assign"
+ input: "bert/encoder/layer_3/output/dense/bias/adam_m"
+ input: "save/RestoreV2:298"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_299"
+ op: "Assign"
+ input: "bert/encoder/layer_3/output/dense/bias/adam_v"
+ input: "save/RestoreV2:299"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_300"
+ op: "Assign"
+ input: "bert/encoder/layer_3/output/dense/kernel"
+ input: "save/RestoreV2:300"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_301"
+ op: "Assign"
+ input: "bert/encoder/layer_3/output/dense/kernel/adam_m"
+ input: "save/RestoreV2:301"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_302"
+ op: "Assign"
+ input: "bert/encoder/layer_3/output/dense/kernel/adam_v"
+ input: "save/RestoreV2:302"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_303"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/beta"
+ input: "save/RestoreV2:303"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_304"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m"
+ input: "save/RestoreV2:304"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_305"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v"
+ input: "save/RestoreV2:305"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_306"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma"
+ input: "save/RestoreV2:306"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_307"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m"
+ input: "save/RestoreV2:307"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_308"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v"
+ input: "save/RestoreV2:308"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_309"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/output/dense/bias"
+ input: "save/RestoreV2:309"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_310"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/output/dense/bias/adam_m"
+ input: "save/RestoreV2:310"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_311"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/output/dense/bias/adam_v"
+ input: "save/RestoreV2:311"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_312"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/output/dense/kernel"
+ input: "save/RestoreV2:312"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_313"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m"
+ input: "save/RestoreV2:313"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_314"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v"
+ input: "save/RestoreV2:314"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_315"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/key/bias"
+ input: "save/RestoreV2:315"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_316"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/key/bias/adam_m"
+ input: "save/RestoreV2:316"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_317"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/key/bias/adam_v"
+ input: "save/RestoreV2:317"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_318"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/key/kernel"
+ input: "save/RestoreV2:318"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_319"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/key/kernel/adam_m"
+ input: "save/RestoreV2:319"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_320"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/key/kernel/adam_v"
+ input: "save/RestoreV2:320"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_321"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/query/bias"
+ input: "save/RestoreV2:321"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_322"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/query/bias/adam_m"
+ input: "save/RestoreV2:322"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_323"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/query/bias/adam_v"
+ input: "save/RestoreV2:323"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_324"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/query/kernel"
+ input: "save/RestoreV2:324"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_325"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/query/kernel/adam_m"
+ input: "save/RestoreV2:325"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_326"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/query/kernel/adam_v"
+ input: "save/RestoreV2:326"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_327"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/value/bias"
+ input: "save/RestoreV2:327"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_328"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/value/bias/adam_m"
+ input: "save/RestoreV2:328"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_329"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/value/bias/adam_v"
+ input: "save/RestoreV2:329"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_330"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/value/kernel"
+ input: "save/RestoreV2:330"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/kernel"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_331"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/value/kernel/adam_m"
+ input: "save/RestoreV2:331"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_332"
+ op: "Assign"
+ input: "bert/encoder/layer_4/attention/self/value/kernel/adam_v"
+ input: "save/RestoreV2:332"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 768
+ }
+ dim {
+ size: 768
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_333"
+ op: "Assign"
+ input: "bert/encoder/layer_4/intermediate/dense/bias"
+ input: "save/RestoreV2:333"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/bias"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_334"
+ op: "Assign"
+ input: "bert/encoder/layer_4/intermediate/dense/bias/adam_m"
+ input: "save/RestoreV2:334"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_m"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_335"
+ op: "Assign"
+ input: "bert/encoder/layer_4/intermediate/dense/bias/adam_v"
+ input: "save/RestoreV2:335"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_v"
+ }
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 3072
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "save/Assign_336"
+ op: "Assign"
+ input: "bert/encoder/layer_4/intermediate/dense/kernel"
+ input: "save/RestoreV2:336"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@bert/encoder/layer_4/inte